VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 99654

Last change on this file since 99654 was 99245, checked in by vboxsync, 21 months ago

VMM/PGM: Nested VMX: bugref:10318 Reverted r156627.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 224.7 KB
Line 
1/* $Id: PGMAllPool.cpp 99245 2023-03-31 06:14:22Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM_POOL
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/vmm/pgm.h>
35#include <VBox/vmm/mm.h>
36#include <VBox/vmm/em.h>
37#include <VBox/vmm/cpum.h>
38#include "PGMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include "PGMInline.h"
41#include <VBox/vmm/hm_vmx.h>
42
43#include <VBox/log.h>
44#include <VBox/err.h>
45#include <iprt/asm.h>
46#include <iprt/string.h>
47
48
49/*********************************************************************************************************************************
50* Internal Functions *
51*********************************************************************************************************************************/
52RT_C_DECLS_BEGIN
53#if 0 /* unused */
54DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
55DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
56#endif /* unused */
57static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
58static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
59static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
62static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
63#endif
64#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
65static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
66#endif
67
68int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
69PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
70void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
71void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
72
73RT_C_DECLS_END
74
75
76#if 0 /* unused */
77/**
78 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
79 *
80 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
81 * @param enmKind The page kind.
82 */
83DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
84{
85 switch (enmKind)
86 {
87 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
88 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
90 return true;
91 default:
92 return false;
93 }
94}
95#endif /* unused */
96
97
98/**
99 * Flushes a chain of pages sharing the same access monitor.
100 *
101 * @param pPool The pool.
102 * @param pPage A page in the chain.
103 */
104void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
105{
106 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
107
108 /*
109 * Find the list head.
110 */
111 uint16_t idx = pPage->idx;
112 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
113 {
114 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
115 {
116 idx = pPage->iMonitoredPrev;
117 Assert(idx != pPage->idx);
118 pPage = &pPool->aPages[idx];
119 }
120 }
121
122 /*
123 * Iterate the list flushing each shadow page.
124 */
125 for (;;)
126 {
127 idx = pPage->iMonitoredNext;
128 Assert(idx != pPage->idx);
129 if (pPage->idx >= PGMPOOL_IDX_FIRST)
130 {
131 int rc2 = pgmPoolFlushPage(pPool, pPage);
132 AssertRC(rc2);
133 }
134 /* next */
135 if (idx == NIL_PGMPOOL_IDX)
136 break;
137 pPage = &pPool->aPages[idx];
138 }
139}
140
141
142/**
143 * Wrapper for getting the current context pointer to the entry being modified.
144 *
145 * @returns VBox status code suitable for scheduling.
146 * @param pVM The cross context VM structure.
147 * @param pvDst Destination address
148 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
149 * on the context (e.g. \#PF in R0 & RC).
150 * @param GCPhysSrc The source guest physical address.
151 * @param cb Size of data to read
152 */
153DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
154{
155#if defined(IN_RING3)
156 NOREF(pVM); NOREF(GCPhysSrc);
157 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
158 return VINF_SUCCESS;
159#else
160 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
161 NOREF(pvSrc);
162 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
163#endif
164}
165
166
167/**
168 * Process shadow entries before they are changed by the guest.
169 *
170 * For PT entries we will clear them. For PD entries, we'll simply check
171 * for mapping conflicts and set the SyncCR3 FF if found.
172 *
173 * @param pVCpu The cross context virtual CPU structure.
174 * @param pPool The pool.
175 * @param pPage The head page.
176 * @param GCPhysFault The guest physical fault address.
177 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
178 * depending on the context (e.g. \#PF in R0 & RC).
179 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
180 */
181static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
182 void const *pvAddress, unsigned cbWrite)
183{
184 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
185 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
186 PVMCC pVM = pPool->CTX_SUFF(pVM);
187 NOREF(pVCpu);
188
189 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
190 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
191
192 if (PGMPOOL_PAGE_IS_NESTED(pPage))
193 Log7Func(("%RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
194
195 for (;;)
196 {
197 union
198 {
199 void *pv;
200 PX86PT pPT;
201 PPGMSHWPTPAE pPTPae;
202 PX86PD pPD;
203 PX86PDPAE pPDPae;
204 PX86PDPT pPDPT;
205 PX86PML4 pPML4;
206#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
207 PEPTPDPT pEptPdpt;
208 PEPTPD pEptPd;
209 PEPTPT pEptPt;
210#endif
211 } uShw;
212
213 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
214 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
215
216 uShw.pv = NULL;
217 switch (pPage->enmKind)
218 {
219 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
220 {
221 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
222 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
223 const unsigned iShw = off / sizeof(X86PTE);
224 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
225 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
226 if (uPde & X86_PTE_P)
227 {
228 X86PTE GstPte;
229 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
230 AssertRC(rc);
231 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
232 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
233 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
234 }
235 break;
236 }
237
238 /* page/2 sized */
239 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
240 {
241 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
242 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
243 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
244 {
245 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
246 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
247 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
248 {
249 X86PTE GstPte;
250 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
251 AssertRC(rc);
252
253 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
254 pgmPoolTracDerefGCPhysHint(pPool, pPage,
255 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
256 GstPte.u & X86_PTE_PG_MASK,
257 iShw);
258 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
259 }
260 }
261 break;
262 }
263
264 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
265 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
266 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
267 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
268 {
269 unsigned iGst = off / sizeof(X86PDE);
270 unsigned iShwPdpt = iGst / 256;
271 unsigned iShw = (iGst % 256) * 2;
272 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
273
274 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
275 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
276 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
277 {
278 for (unsigned i = 0; i < 2; i++)
279 {
280 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
281 if (uPde & X86_PDE_P)
282 {
283 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
284 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
285 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
286 }
287
288 /* paranoia / a bit assumptive. */
289 if ( (off & 3)
290 && (off & 3) + cbWrite > 4)
291 {
292 const unsigned iShw2 = iShw + 2 + i;
293 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
294 {
295 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
296 if (uPde2 & X86_PDE_P)
297 {
298 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
299 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
300 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
301 }
302 }
303 }
304 }
305 }
306 break;
307 }
308
309 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
310 {
311 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
312 const unsigned iShw = off / sizeof(X86PTEPAE);
313 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
314 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
315 {
316 X86PTEPAE GstPte;
317 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
318 AssertRC(rc);
319
320 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
321 pgmPoolTracDerefGCPhysHint(pPool, pPage,
322 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
323 GstPte.u & X86_PTE_PAE_PG_MASK,
324 iShw);
325 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
326 }
327
328 /* paranoia / a bit assumptive. */
329 if ( (off & 7)
330 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
331 {
332 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
333 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
334
335 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
336 {
337 X86PTEPAE GstPte;
338 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
339 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
340 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
341 AssertRC(rc);
342 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
343 pgmPoolTracDerefGCPhysHint(pPool, pPage,
344 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
345 GstPte.u & X86_PTE_PAE_PG_MASK,
346 iShw2);
347 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
348 }
349 }
350 break;
351 }
352
353 case PGMPOOLKIND_32BIT_PD:
354 {
355 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
356 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
357
358 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
359 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
360 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
361 if (uPde & X86_PDE_P)
362 {
363 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
364 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
365 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
366 }
367
368 /* paranoia / a bit assumptive. */
369 if ( (off & 3)
370 && (off & 3) + cbWrite > sizeof(X86PTE))
371 {
372 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
373 if ( iShw2 != iShw
374 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
375 {
376 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
377 if (uPde2 & X86_PDE_P)
378 {
379 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
380 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
381 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
382 }
383 }
384 }
385#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
386 if ( uShw.pPD->a[iShw].n.u1Present
387 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
388 {
389 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
390 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
391 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
392 }
393#endif
394 break;
395 }
396
397 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
398 {
399 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
400 const unsigned iShw = off / sizeof(X86PDEPAE);
401 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
402
403 /*
404 * Causes trouble when the guest uses a PDE to refer to the whole page table level
405 * structure. (Invalidate here; faults later on when it tries to change the page
406 * table entries -> recheck; probably only applies to the RC case.)
407 */
408 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
409 if (uPde & X86_PDE_P)
410 {
411 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
412 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
413 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
414 }
415
416 /* paranoia / a bit assumptive. */
417 if ( (off & 7)
418 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
419 {
420 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
421 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
422
423 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
424 if (uPde2 & X86_PDE_P)
425 {
426 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
427 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
428 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
429 }
430 }
431 break;
432 }
433
434 case PGMPOOLKIND_PAE_PDPT:
435 {
436 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
437 /*
438 * Hopefully this doesn't happen very often:
439 * - touching unused parts of the page
440 * - messing with the bits of pd pointers without changing the physical address
441 */
442 /* PDPT roots are not page aligned; 32 byte only! */
443 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
444
445 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
446 const unsigned iShw = offPdpt / sizeof(X86PDPE);
447 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
448 {
449 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
450 if (uPdpe & X86_PDPE_P)
451 {
452 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
453 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
454 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
455 }
456
457 /* paranoia / a bit assumptive. */
458 if ( (offPdpt & 7)
459 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
460 {
461 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
462 if ( iShw2 != iShw
463 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
464 {
465 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
466 if (uPdpe2 & X86_PDPE_P)
467 {
468 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
469 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
470 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
471 }
472 }
473 }
474 }
475 break;
476 }
477
478 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
479 {
480 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
481 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
482 const unsigned iShw = off / sizeof(X86PDEPAE);
483 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
484 if (uPde & X86_PDE_P)
485 {
486 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
487 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
488 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
489 }
490
491 /* paranoia / a bit assumptive. */
492 if ( (off & 7)
493 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
494 {
495 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
496 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
497 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
498 if (uPde2 & X86_PDE_P)
499 {
500 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
501 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
502 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
503 }
504 }
505 break;
506 }
507
508 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
509 {
510 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
511 /*
512 * Hopefully this doesn't happen very often:
513 * - messing with the bits of pd pointers without changing the physical address
514 */
515 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
516 const unsigned iShw = off / sizeof(X86PDPE);
517 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
518 if (uPdpe & X86_PDPE_P)
519 {
520 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
521 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
522 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
523 }
524 /* paranoia / a bit assumptive. */
525 if ( (off & 7)
526 && (off & 7) + cbWrite > sizeof(X86PDPE))
527 {
528 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
529 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
530 if (uPdpe2 & X86_PDPE_P)
531 {
532 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
533 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
534 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
535 }
536 }
537 break;
538 }
539
540 case PGMPOOLKIND_64BIT_PML4:
541 {
542 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
543 /*
544 * Hopefully this doesn't happen very often:
545 * - messing with the bits of pd pointers without changing the physical address
546 */
547 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
548 const unsigned iShw = off / sizeof(X86PDPE);
549 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
550 if (uPml4e & X86_PML4E_P)
551 {
552 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
553 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
554 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
555 }
556 /* paranoia / a bit assumptive. */
557 if ( (off & 7)
558 && (off & 7) + cbWrite > sizeof(X86PDPE))
559 {
560 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
561 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
562 if (uPml4e2 & X86_PML4E_P)
563 {
564 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
565 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
566 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
567 }
568 }
569 break;
570 }
571
572#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
573 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
574 {
575 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
576 const unsigned iShw = off / sizeof(EPTPML4E);
577 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
578 if (uPml4e & EPT_PRESENT_MASK)
579 {
580 Log7Func(("PML4 iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPml4e, pPage->GCPhys));
581 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
582 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
583 }
584
585 /* paranoia / a bit assumptive. */
586 if ( (off & 7)
587 && (off & 7) + cbWrite > sizeof(X86PML4E))
588 {
589 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
590 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
591 if (uPml4e2 & EPT_PRESENT_MASK)
592 {
593 Log7Func(("PML4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
594 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
595 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
596 }
597 }
598 break;
599 }
600
601 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
602 {
603 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
604 const unsigned iShw = off / sizeof(EPTPDPTE);
605 X86PGPAEUINT const uPdpte = uShw.pEptPdpt->a[iShw].u;
606 if (uPdpte & EPT_PRESENT_MASK)
607 {
608 Log7Func(("EPT PDPT iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPdpte, pPage->GCPhys));
609 pgmPoolFree(pVM, uPdpte & EPT_PDPTE_PG_MASK, pPage->idx, iShw);
610 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw].u, 0);
611 }
612
613 /* paranoia / a bit assumptive. */
614 if ( (off & 7)
615 && (off & 7) + cbWrite > sizeof(EPTPDPTE))
616 {
617 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDPTE);
618 X86PGPAEUINT const uPdpte2 = uShw.pEptPdpt->a[iShw2].u;
619 if (uPdpte2 & EPT_PRESENT_MASK)
620 {
621 Log7Func(("EPT PDPT iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpte2));
622 pgmPoolFree(pVM, uPdpte2 & EPT_PDPTE_PG_MASK, pPage->idx, iShw2);
623 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw2].u, 0);
624 }
625 }
626 break;
627 }
628
629 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
630 {
631 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
632 const unsigned iShw = off / sizeof(EPTPDE);
633 X86PGPAEUINT const uPde = uShw.pEptPd->a[iShw].u;
634 if (uPde & EPT_PRESENT_MASK)
635 {
636 Log7Func(("EPT PD iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPde, pPage->GCPhys));
637 pgmPoolFree(pVM, uPde & EPT_PDE_PG_MASK, pPage->idx, iShw);
638 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw].u, 0);
639 }
640
641 /* paranoia / a bit assumptive. */
642 if ( (off & 7)
643 && (off & 7) + cbWrite > sizeof(EPTPDE))
644 {
645 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDE);
646 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPd->a));
647 X86PGPAEUINT const uPde2 = uShw.pEptPd->a[iShw2].u;
648 if (uPde2 & EPT_PRESENT_MASK)
649 {
650 Log7Func(("EPT PD (2): iShw2=%#x: %RX64 (%RGp) -> freeing it!\n", iShw2, uPde2, pPage->GCPhys));
651 pgmPoolFree(pVM, uPde2 & EPT_PDE_PG_MASK, pPage->idx, iShw2);
652 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw2].u, 0);
653 }
654 }
655 break;
656 }
657
658 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
659 {
660 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
661 const unsigned iShw = off / sizeof(EPTPTE);
662 X86PGPAEUINT const uPte = uShw.pEptPt->a[iShw].u;
663 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
664 if (uPte & EPT_PRESENT_MASK)
665 {
666 EPTPTE GstPte;
667 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
668 AssertRC(rc);
669
670 Log7Func(("EPT PT: iShw=%#x %RX64 (%RGp)\n", iShw, uPte, pPage->GCPhys));
671 pgmPoolTracDerefGCPhysHint(pPool, pPage,
672 uShw.pEptPt->a[iShw].u & EPT_PTE_PG_MASK,
673 GstPte.u & EPT_PTE_PG_MASK,
674 iShw);
675 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw].u, 0);
676 }
677
678 /* paranoia / a bit assumptive. */
679 if ( (off & 7)
680 && (off & 7) + cbWrite > sizeof(EPTPTE))
681 {
682 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPTE);
683 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPt->a));
684 X86PGPAEUINT const uPte2 = uShw.pEptPt->a[iShw2].u;
685 if (uPte2 & EPT_PRESENT_MASK)
686 {
687 EPTPTE GstPte;
688 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
689 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
690 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
691 AssertRC(rc);
692 Log7Func(("EPT PT (2): iShw=%#x %RX64 (%RGp)\n", iShw2, uPte2, pPage->GCPhys));
693 pgmPoolTracDerefGCPhysHint(pPool, pPage,
694 uShw.pEptPt->a[iShw2].u & EPT_PTE_PG_MASK,
695 GstPte.u & EPT_PTE_PG_MASK,
696 iShw2);
697 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw2].u, 0);
698 }
699 }
700 break;
701 }
702#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
703
704 default:
705 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
706 }
707 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
708
709 /* next */
710 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
711 return;
712 pPage = &pPool->aPages[pPage->iMonitoredNext];
713 }
714}
715
716#ifndef IN_RING3
717
718/**
719 * Checks if a access could be a fork operation in progress.
720 *
721 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
722 *
723 * @returns true if it's likely that we're forking, otherwise false.
724 * @param pPool The pool.
725 * @param pDis The disassembled instruction.
726 * @param offFault The access offset.
727 */
728DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISSTATE pDis, unsigned offFault)
729{
730 /*
731 * i386 linux is using btr to clear X86_PTE_RW.
732 * The functions involved are (2.6.16 source inspection):
733 * clear_bit
734 * ptep_set_wrprotect
735 * copy_one_pte
736 * copy_pte_range
737 * copy_pmd_range
738 * copy_pud_range
739 * copy_page_range
740 * dup_mmap
741 * dup_mm
742 * copy_mm
743 * copy_process
744 * do_fork
745 */
746 if ( pDis->pCurInstr->uOpcode == OP_BTR
747 && !(offFault & 4)
748 /** @todo Validate that the bit index is X86_PTE_RW. */
749 )
750 {
751 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
752 return true;
753 }
754 return false;
755}
756
757
758/**
759 * Determine whether the page is likely to have been reused.
760 *
761 * @returns true if we consider the page as being reused for a different purpose.
762 * @returns false if we consider it to still be a paging page.
763 * @param pVM The cross context VM structure.
764 * @param pVCpu The cross context virtual CPU structure.
765 * @param pCtx Pointer to the register context for the CPU.
766 * @param pDis The disassembly info for the faulting instruction.
767 * @param pvFault The fault address.
768 * @param pPage The pool page being accessed.
769 *
770 * @remark The REP prefix check is left to the caller because of STOSD/W.
771 */
772DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTX pCtx, PDISSTATE pDis, RTGCPTR pvFault,
773 PPGMPOOLPAGE pPage)
774{
775 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
776 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
777 if (pPage->cLocked)
778 {
779 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
780 return false;
781 }
782
783 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
784 if ( HMHasPendingIrq(pVM)
785 && pCtx->rsp - pvFault < 32)
786 {
787 /* Fault caused by stack writes while trying to inject an interrupt event. */
788 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pCtx->rsp));
789 return true;
790 }
791
792 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pCtx->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.arch.x86.Base.idxGenReg));
793
794 /* Non-supervisor mode write means it's used for something else. */
795 if (CPUMGetGuestCPL(pVCpu) == 3)
796 return true;
797
798 switch (pDis->pCurInstr->uOpcode)
799 {
800 /* call implies the actual push of the return address faulted */
801 case OP_CALL:
802 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
803 return true;
804 case OP_PUSH:
805 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
806 return true;
807 case OP_PUSHF:
808 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
809 return true;
810 case OP_PUSHA:
811 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
812 return true;
813 case OP_FXSAVE:
814 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
815 return true;
816 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
817 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
818 return true;
819 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
820 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
821 return true;
822 case OP_MOVSWD:
823 case OP_STOSWD:
824 if ( pDis->arch.x86.fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
825 && pCtx->rcx >= 0x40
826 )
827 {
828 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
829
830 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
831 return true;
832 }
833 break;
834
835 default:
836 /*
837 * Anything having ESP on the left side means stack writes.
838 */
839 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
840 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
841 && (pDis->Param1.arch.x86.Base.idxGenReg == DISGREG_ESP))
842 {
843 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
844 return true;
845 }
846 break;
847 }
848
849 /*
850 * Page table updates are very very unlikely to be crossing page boundraries,
851 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
852 */
853 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
854 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
855 {
856 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
857 return true;
858 }
859
860 /*
861 * Nobody does an unaligned 8 byte write to a page table, right.
862 */
863 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
864 {
865 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
866 return true;
867 }
868
869 return false;
870}
871
872
873/**
874 * Flushes the page being accessed.
875 *
876 * @returns VBox status code suitable for scheduling.
877 * @param pVM The cross context VM structure.
878 * @param pVCpu The cross context virtual CPU structure.
879 * @param pPool The pool.
880 * @param pPage The pool page (head).
881 * @param pDis The disassembly of the write instruction.
882 * @param pCtx Pointer to the register context for the CPU.
883 * @param GCPhysFault The fault address as guest physical address.
884 * @todo VBOXSTRICTRC
885 */
886static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
887 PCPUMCTX pCtx, RTGCPHYS GCPhysFault)
888{
889 NOREF(pVM); NOREF(GCPhysFault);
890
891 /*
892 * First, do the flushing.
893 */
894 pgmPoolMonitorChainFlush(pPool, pPage);
895
896 /*
897 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
898 * Must do this in raw mode (!); XP boot will fail otherwise.
899 */
900 int rc = VINF_SUCCESS;
901 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pCtx->rip);
902 if (rc2 == VINF_SUCCESS)
903 { /* do nothing */ }
904 else if (rc2 == VINF_EM_RESCHEDULE)
905 {
906 rc = VBOXSTRICTRC_VAL(rc2);
907# ifndef IN_RING3
908 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
909# endif
910 }
911 else if (rc2 == VERR_EM_INTERPRETER)
912 {
913 rc = VINF_EM_RAW_EMULATE_INSTR;
914 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
915 }
916 else if (RT_FAILURE_NP(rc2))
917 rc = VBOXSTRICTRC_VAL(rc2);
918 else
919 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
920
921 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
922 return rc;
923}
924
925
926/**
927 * Handles the STOSD write accesses.
928 *
929 * @returns VBox status code suitable for scheduling.
930 * @param pVM The cross context VM structure.
931 * @param pPool The pool.
932 * @param pPage The pool page (head).
933 * @param pDis The disassembly of the write instruction.
934 * @param pCtx Pointer to the register context for the CPU.
935 * @param GCPhysFault The fault address as guest physical address.
936 * @param pvFault The fault address.
937 */
938DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
939 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
940{
941 unsigned uIncrement = pDis->Param1.arch.x86.cb;
942 NOREF(pVM);
943
944 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
945 Assert(pCtx->rcx <= 0x20);
946
947# ifdef VBOX_STRICT
948 if (pDis->arch.x86.uOpMode == DISCPUMODE_32BIT)
949 Assert(uIncrement == 4);
950 else
951 Assert(uIncrement == 8);
952# endif
953
954 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
955
956 /*
957 * Increment the modification counter and insert it into the list
958 * of modified pages the first time.
959 */
960 if (!pPage->cModifications++)
961 pgmPoolMonitorModifiedInsert(pPool, pPage);
962
963 /*
964 * Execute REP STOSD.
965 *
966 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
967 * write situation, meaning that it's safe to write here.
968 */
969 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
970 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
971 while (pCtx->rcx)
972 {
973 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
974 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pCtx->rax, uIncrement);
975 pu32 += uIncrement;
976 GCPhysFault += uIncrement;
977 pCtx->rdi += uIncrement;
978 pCtx->rcx--;
979 }
980 pCtx->rip += pDis->cbInstr;
981
982 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
983 return VINF_SUCCESS;
984}
985
986
987/**
988 * Handles the simple write accesses.
989 *
990 * @returns VBox status code suitable for scheduling.
991 * @param pVM The cross context VM structure.
992 * @param pVCpu The cross context virtual CPU structure.
993 * @param pPool The pool.
994 * @param pPage The pool page (head).
995 * @param pDis The disassembly of the write instruction.
996 * @param pCtx Pointer to the register context for the CPU.
997 * @param GCPhysFault The fault address as guest physical address.
998 * @param pfReused Reused state (in/out)
999 */
1000DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
1001 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, bool *pfReused)
1002{
1003 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
1004 NOREF(pVM);
1005 NOREF(pfReused); /* initialized by caller */
1006
1007 /*
1008 * Increment the modification counter and insert it into the list
1009 * of modified pages the first time.
1010 */
1011 if (!pPage->cModifications++)
1012 pgmPoolMonitorModifiedInsert(pPool, pPage);
1013
1014 /*
1015 * Clear all the pages.
1016 */
1017 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1018 if (cbWrite <= 8)
1019 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1020 else if (cbWrite <= 16)
1021 {
1022 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1023 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1024 }
1025 else
1026 {
1027 Assert(cbWrite <= 32);
1028 for (uint32_t off = 0; off < cbWrite; off += 8)
1029 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1030 }
1031
1032 /*
1033 * Interpret the instruction.
1034 */
1035 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pCtx->rip);
1036 if (RT_SUCCESS(rc))
1037 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1038 else if (rc == VERR_EM_INTERPRETER)
1039 {
1040 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
1041 pCtx->cs.Sel, (RTGCPTR)pCtx->rip, pDis->pCurInstr->uOpcode));
1042 rc = VINF_EM_RAW_EMULATE_INSTR;
1043 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
1044 }
1045
1046# if 0 /* experimental code */
1047 if (rc == VINF_SUCCESS)
1048 {
1049 switch (pPage->enmKind)
1050 {
1051 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1052 {
1053 X86PTEPAE GstPte;
1054 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1055 AssertRC(rc);
1056
1057 /* Check the new value written by the guest. If present and with a bogus physical address, then
1058 * it's fairly safe to assume the guest is reusing the PT.
1059 */
1060 if (GstPte.n.u1Present)
1061 {
1062 RTHCPHYS HCPhys = -1;
1063 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1064 if (rc != VINF_SUCCESS)
1065 {
1066 *pfReused = true;
1067 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1068 }
1069 }
1070 break;
1071 }
1072 }
1073 }
1074# endif
1075
1076 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1077 return VBOXSTRICTRC_VAL(rc);
1078}
1079
1080
1081/**
1082 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1083 * \#PF access handler callback for page table pages.}
1084 *
1085 * @remarks The @a uUser argument is the index of the PGMPOOLPAGE.
1086 */
1087DECLCALLBACK(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTX pCtx,
1088 RTGCPTR pvFault, RTGCPHYS GCPhysFault, uint64_t uUser)
1089{
1090 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
1091 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1092 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1093 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1094 unsigned cMaxModifications;
1095 bool fForcedFlush = false;
1096 RT_NOREF_PV(uErrorCode);
1097
1098# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1099 AssertMsg(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT,
1100 ("pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1101# endif
1102 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1103
1104 PGM_LOCK_VOID(pVM);
1105 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1106 {
1107 /* Pool page changed while we were waiting for the lock; ignore. */
1108 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1109 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1110 PGM_UNLOCK(pVM);
1111 return VINF_SUCCESS;
1112 }
1113# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1114 if (pPage->fDirty)
1115 {
1116# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1117 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1118# endif
1119 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1120 PGM_UNLOCK(pVM);
1121 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1122 }
1123# endif
1124
1125# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1126 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1127 {
1128 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1129 void *pvGst;
1130 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1131 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1132 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1133 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1134 }
1135# endif
1136
1137# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1138 if (PGMPOOL_PAGE_IS_NESTED(pPage))
1139 {
1140 Assert(!CPUMIsGuestInVmxNonRootMode(CPUMQueryGuestCtxPtr(pVCpu)));
1141 Log7Func(("Flushing pvFault=%RGv GCPhysFault=%RGp\n", pvFault, GCPhysFault));
1142 pgmPoolMonitorChainFlush(pPool, pPage);
1143 PGM_UNLOCK(pVM);
1144 return VINF_SUCCESS;
1145 }
1146# endif
1147
1148 /*
1149 * Disassemble the faulting instruction.
1150 */
1151 PDISSTATE pDis = &pVCpu->pgm.s.Dis;
1152 int rc = EMInterpretDisasCurrent(pVCpu, pDis, NULL);
1153 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1154 {
1155 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1156 PGM_UNLOCK(pVM);
1157 return rc;
1158 }
1159
1160 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1161
1162 /*
1163 * We should ALWAYS have the list head as user parameter. This
1164 * is because we use that page to record the changes.
1165 */
1166 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1167
1168# ifdef IN_RING0
1169 /* Maximum nr of modifications depends on the page type. */
1170 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1171 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1172 cMaxModifications = 4;
1173 else
1174 cMaxModifications = 24;
1175# else
1176 cMaxModifications = 48;
1177# endif
1178
1179 /*
1180 * Incremental page table updates should weigh more than random ones.
1181 * (Only applies when started from offset 0)
1182 */
1183 pVCpu->pgm.s.cPoolAccessHandler++;
1184 if ( pPage->GCPtrLastAccessHandlerRip >= pCtx->rip - 0x40 /* observed loops in Windows 7 x64 */
1185 && pPage->GCPtrLastAccessHandlerRip < pCtx->rip + 0x40
1186 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.arch.x86.cb)
1187 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1188 {
1189 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1190 Assert(pPage->cModifications < 32000);
1191 pPage->cModifications = pPage->cModifications * 2;
1192 pPage->GCPtrLastAccessHandlerFault = pvFault;
1193 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1194 if (pPage->cModifications >= cMaxModifications)
1195 {
1196 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1197 fForcedFlush = true;
1198 }
1199 }
1200
1201 if (pPage->cModifications >= cMaxModifications)
1202 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1203
1204 /*
1205 * Check if it's worth dealing with.
1206 */
1207 bool fReused = false;
1208 bool fNotReusedNotForking = false;
1209 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1210 || pgmPoolIsPageLocked(pPage)
1211 )
1212 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pCtx, pDis, pvFault, pPage))
1213 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1214 {
1215 /*
1216 * Simple instructions, no REP prefix.
1217 */
1218 if (!(pDis->arch.x86.fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1219 {
1220 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pCtx, GCPhysFault, &fReused);
1221 if (fReused)
1222 goto flushPage;
1223
1224 /* A mov instruction to change the first page table entry will be remembered so we can detect
1225 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1226 */
1227 if ( rc == VINF_SUCCESS
1228 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1229 && pDis->pCurInstr->uOpcode == OP_MOV
1230 && (pvFault & PAGE_OFFSET_MASK) == 0)
1231 {
1232 pPage->GCPtrLastAccessHandlerFault = pvFault;
1233 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1234 pPage->GCPtrLastAccessHandlerRip = pCtx->rip;
1235 /* Make sure we don't kick out a page too quickly. */
1236 if (pPage->cModifications > 8)
1237 pPage->cModifications = 2;
1238 }
1239 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1240 {
1241 /* ignore the 2nd write to this page table entry. */
1242 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1243 }
1244 else
1245 {
1246 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1247 pPage->GCPtrLastAccessHandlerRip = 0;
1248 }
1249
1250 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1251 PGM_UNLOCK(pVM);
1252 return rc;
1253 }
1254
1255 /*
1256 * Windows is frequently doing small memset() operations (netio test 4k+).
1257 * We have to deal with these or we'll kill the cache and performance.
1258 */
1259 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1260 && !pCtx->eflags.Bits.u1DF
1261 && pDis->arch.x86.uOpMode == pDis->uCpuMode
1262 && pDis->arch.x86.uAddrMode == pDis->uCpuMode)
1263 {
1264 bool fValidStosd = false;
1265
1266 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1267 && pDis->arch.x86.fPrefix == DISPREFIX_REP
1268 && pCtx->ecx <= 0x20
1269 && pCtx->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1270 && !((uintptr_t)pvFault & 3)
1271 && (pCtx->eax == 0 || pCtx->eax == 0x80) /* the two values observed. */
1272 )
1273 {
1274 fValidStosd = true;
1275 pCtx->rcx &= 0xffffffff; /* paranoia */
1276 }
1277 else
1278 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1279 && pDis->arch.x86.fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1280 && pCtx->rcx <= 0x20
1281 && pCtx->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1282 && !((uintptr_t)pvFault & 7)
1283 && (pCtx->rax == 0 || pCtx->rax == 0x80) /* the two values observed. */
1284 )
1285 {
1286 fValidStosd = true;
1287 }
1288
1289 if (fValidStosd)
1290 {
1291 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pCtx, GCPhysFault, pvFault);
1292 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1293 PGM_UNLOCK(pVM);
1294 return rc;
1295 }
1296 }
1297
1298 /* REP prefix, don't bother. */
1299 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1300 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1301 pCtx->eax, pCtx->ecx, pCtx->edi, pCtx->esi, (RTGCPTR)pCtx->rip, pDis->pCurInstr->uOpcode, pDis->arch.x86.fPrefix));
1302 fNotReusedNotForking = true;
1303 }
1304
1305# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1306 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1307 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1308 */
1309 if ( pPage->cModifications >= cMaxModifications
1310 && !fForcedFlush
1311 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1312 && ( fNotReusedNotForking
1313 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pCtx, pDis, pvFault, pPage)
1314 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1315 )
1316 )
1317 {
1318 Assert(!pgmPoolIsPageLocked(pPage));
1319 Assert(pPage->fDirty == false);
1320
1321 /* Flush any monitored duplicates as we will disable write protection. */
1322 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1323 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1324 {
1325 PPGMPOOLPAGE pPageHead = pPage;
1326
1327 /* Find the monitor head. */
1328 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1329 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1330
1331 while (pPageHead)
1332 {
1333 unsigned idxNext = pPageHead->iMonitoredNext;
1334
1335 if (pPageHead != pPage)
1336 {
1337 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1338 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1339 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1340 AssertRC(rc2);
1341 }
1342
1343 if (idxNext == NIL_PGMPOOL_IDX)
1344 break;
1345
1346 pPageHead = &pPool->aPages[idxNext];
1347 }
1348 }
1349
1350 /* The flushing above might fail for locked pages, so double check. */
1351 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1352 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1353 {
1354 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1355
1356 /* Temporarily allow write access to the page table again. */
1357 rc = PGMHandlerPhysicalPageTempOff(pVM,
1358 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK,
1359 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1360 if (rc == VINF_SUCCESS)
1361 {
1362 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1363 AssertMsg(rc == VINF_SUCCESS
1364 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1365 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1366 || rc == VERR_PAGE_NOT_PRESENT,
1367 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1368# ifdef VBOX_STRICT
1369 pPage->GCPtrDirtyFault = pvFault;
1370# endif
1371
1372 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1373 PGM_UNLOCK(pVM);
1374 return rc;
1375 }
1376 }
1377 }
1378# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1379
1380 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1381flushPage:
1382 /*
1383 * Not worth it, so flush it.
1384 *
1385 * If we considered it to be reused, don't go back to ring-3
1386 * to emulate failed instructions since we usually cannot
1387 * interpret then. This may be a bit risky, in which case
1388 * the reuse detection must be fixed.
1389 */
1390 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pCtx, GCPhysFault);
1391 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1392 && fReused)
1393 {
1394 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1395 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1396 if (PGMShwGetPage(pVCpu, pCtx->rip, NULL, NULL) == VINF_SUCCESS)
1397 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1398 }
1399 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1400 PGM_UNLOCK(pVM);
1401 return rc;
1402}
1403
1404#endif /* !IN_RING3 */
1405
1406/**
1407 * @callback_method_impl{FNPGMPHYSHANDLER,
1408 * Access handler for shadowed page table pages.}
1409 *
1410 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1411 * @note The @a uUser argument is the index of the PGMPOOLPAGE.
1412 */
1413DECLCALLBACK(VBOXSTRICTRC)
1414pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1415 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, uint64_t uUser)
1416{
1417 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1418 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1419 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1420 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1421 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1422 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1423
1424 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1425
1426 PGM_LOCK_VOID(pVM);
1427
1428#ifdef VBOX_WITH_STATISTICS
1429 /*
1430 * Collect stats on the access.
1431 */
1432 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1433 if (cbBuf <= 16 && cbBuf > 0)
1434 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1435 else if (cbBuf >= 17 && cbBuf < 32)
1436 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1437 else if (cbBuf >= 32 && cbBuf < 64)
1438 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1439 else if (cbBuf >= 64)
1440 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1441
1442 uint8_t cbAlign;
1443 switch (pPage->enmKind)
1444 {
1445 default:
1446 cbAlign = 7;
1447 break;
1448 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1449 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1450 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1451 case PGMPOOLKIND_32BIT_PD:
1452 case PGMPOOLKIND_32BIT_PD_PHYS:
1453 cbAlign = 3;
1454 break;
1455 }
1456 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1457 if ((uint8_t)GCPhys & cbAlign)
1458 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1459#endif
1460
1461 /*
1462 * Make sure the pool page wasn't modified by a different CPU.
1463 */
1464 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1465 {
1466 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1467
1468 /* The max modification count before flushing depends on the context and page type. */
1469#ifdef IN_RING3
1470 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1471#else
1472 uint16_t cMaxModifications;
1473 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1474 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1475 cMaxModifications = 4;
1476 else
1477 cMaxModifications = 24;
1478#endif
1479
1480 /*
1481 * We don't have to be very sophisticated about this since there are relativly few calls here.
1482 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1483 */
1484 if ( ( pPage->cModifications < cMaxModifications
1485 || pgmPoolIsPageLocked(pPage) )
1486 && enmOrigin != PGMACCESSORIGIN_DEVICE
1487 && cbBuf <= 16)
1488 {
1489 /* Clear the shadow entry. */
1490 if (!pPage->cModifications++)
1491 pgmPoolMonitorModifiedInsert(pPool, pPage);
1492
1493 if (cbBuf <= 8)
1494 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1495 else
1496 {
1497 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1498 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1499 }
1500 }
1501 else
1502 pgmPoolMonitorChainFlush(pPool, pPage);
1503
1504 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1505 }
1506 else
1507 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1508 PGM_UNLOCK(pVM);
1509 return VINF_PGM_HANDLER_DO_DEFAULT;
1510}
1511
1512
1513#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1514
1515# if defined(VBOX_STRICT) && !defined(IN_RING3)
1516
1517/**
1518 * Check references to guest physical memory in a PAE / PAE page table.
1519 *
1520 * @param pPool The pool.
1521 * @param pPage The page.
1522 * @param pShwPT The shadow page table (mapping of the page).
1523 * @param pGstPT The guest page table.
1524 */
1525static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1526{
1527 unsigned cErrors = 0;
1528 int LastRc = -1; /* initialized to shut up gcc */
1529 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1530 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1531 PVMCC pVM = pPool->CTX_SUFF(pVM);
1532
1533# ifdef VBOX_STRICT
1534 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1535 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1536# endif
1537 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1538 {
1539 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1540 {
1541 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1542 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1543 if ( rc != VINF_SUCCESS
1544 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1545 {
1546 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1547 LastPTE = i;
1548 LastRc = rc;
1549 LastHCPhys = HCPhys;
1550 cErrors++;
1551
1552 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1553 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1554 AssertRC(rc);
1555
1556 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1557 {
1558 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1559
1560 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1561 {
1562 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1563
1564 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1565 {
1566 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1567 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1568 {
1569 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1570 }
1571 }
1572
1573 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1574 }
1575 }
1576 }
1577 }
1578 }
1579 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1580}
1581
1582
1583/**
1584 * Check references to guest physical memory in a PAE / 32-bit page table.
1585 *
1586 * @param pPool The pool.
1587 * @param pPage The page.
1588 * @param pShwPT The shadow page table (mapping of the page).
1589 * @param pGstPT The guest page table.
1590 */
1591static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1592{
1593 unsigned cErrors = 0;
1594 int LastRc = -1; /* initialized to shut up gcc */
1595 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1596 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1597 PVMCC pVM = pPool->CTX_SUFF(pVM);
1598
1599# ifdef VBOX_STRICT
1600 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1601 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1602# endif
1603 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1604 {
1605 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1606 {
1607 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1608 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1609 if ( rc != VINF_SUCCESS
1610 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1611 {
1612 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1613 LastPTE = i;
1614 LastRc = rc;
1615 LastHCPhys = HCPhys;
1616 cErrors++;
1617
1618 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1619 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1620 AssertRC(rc);
1621
1622 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1623 {
1624 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1625
1626 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1627 {
1628 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1629
1630 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1631 {
1632 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1633 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1634 {
1635 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1636 }
1637 }
1638
1639 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1640 }
1641 }
1642 }
1643 }
1644 }
1645 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1646}
1647
1648# endif /* VBOX_STRICT && !IN_RING3 */
1649
1650/**
1651 * Clear references to guest physical memory in a PAE / PAE page table.
1652 *
1653 * @returns nr of changed PTEs
1654 * @param pPool The pool.
1655 * @param pPage The page.
1656 * @param pShwPT The shadow page table (mapping of the page).
1657 * @param pGstPT The guest page table.
1658 * @param pOldGstPT The old cached guest page table.
1659 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1660 * @param pfFlush Flush reused page table (out)
1661 */
1662DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1663 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1664{
1665 unsigned cChanged = 0;
1666
1667# ifdef VBOX_STRICT
1668 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1669 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1670# endif
1671 *pfFlush = false;
1672
1673 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1674 {
1675 /* Check the new value written by the guest. If present and with a bogus physical address, then
1676 * it's fairly safe to assume the guest is reusing the PT.
1677 */
1678 if ( fAllowRemoval
1679 && (pGstPT->a[i].u & X86_PTE_P))
1680 {
1681 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1682 {
1683 *pfFlush = true;
1684 return ++cChanged;
1685 }
1686 }
1687 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1688 {
1689 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1690 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1691 {
1692# ifdef VBOX_STRICT
1693 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1694 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1695 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1696# endif
1697 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1698 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1699 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1700 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1701
1702 if ( uHostAttr == uGuestAttr
1703 && fHostRW <= fGuestRW)
1704 continue;
1705 }
1706 cChanged++;
1707 /* Something was changed, so flush it. */
1708 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1709 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1710 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1711 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1712 }
1713 }
1714 return cChanged;
1715}
1716
1717
1718/**
1719 * Clear references to guest physical memory in a PAE / PAE page table.
1720 *
1721 * @returns nr of changed PTEs
1722 * @param pPool The pool.
1723 * @param pPage The page.
1724 * @param pShwPT The shadow page table (mapping of the page).
1725 * @param pGstPT The guest page table.
1726 * @param pOldGstPT The old cached guest page table.
1727 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1728 * @param pfFlush Flush reused page table (out)
1729 */
1730DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1731 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1732{
1733 unsigned cChanged = 0;
1734
1735# ifdef VBOX_STRICT
1736 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1737 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1738# endif
1739 *pfFlush = false;
1740
1741 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1742 {
1743 /* Check the new value written by the guest. If present and with a bogus physical address, then
1744 * it's fairly safe to assume the guest is reusing the PT. */
1745 if (fAllowRemoval)
1746 {
1747 X86PGUINT const uPte = pGstPT->a[i].u;
1748 if ( (uPte & X86_PTE_P)
1749 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1750 {
1751 *pfFlush = true;
1752 return ++cChanged;
1753 }
1754 }
1755 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1756 {
1757 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1758 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1759 {
1760# ifdef VBOX_STRICT
1761 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1762 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1763 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1764# endif
1765 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1766 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1767 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1768 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1769
1770 if ( uHostAttr == uGuestAttr
1771 && fHostRW <= fGuestRW)
1772 continue;
1773 }
1774 cChanged++;
1775 /* Something was changed, so flush it. */
1776 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1777 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1778 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1779 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1780 }
1781 }
1782 return cChanged;
1783}
1784
1785
1786/**
1787 * Flush a dirty page
1788 *
1789 * @param pVM The cross context VM structure.
1790 * @param pPool The pool.
1791 * @param idxSlot Dirty array slot index
1792 * @param fAllowRemoval Allow a reused page table to be removed
1793 */
1794static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1795{
1796 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1797
1798 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1799 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1800 if (idxPage == NIL_PGMPOOL_IDX)
1801 return;
1802
1803 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1804 Assert(pPage->idx == idxPage);
1805 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1806
1807 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1808 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1809
1810 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1811 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1812 Assert(rc == VINF_SUCCESS);
1813 pPage->fDirty = false;
1814
1815# ifdef VBOX_STRICT
1816 uint64_t fFlags = 0;
1817 RTHCPHYS HCPhys;
1818 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1819 AssertMsg( ( rc == VINF_SUCCESS
1820 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1821 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1822 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1823 || rc == VERR_PAGE_NOT_PRESENT,
1824 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1825# endif
1826
1827 /* Flush those PTEs that have changed. */
1828 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1829 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1830 void *pvGst;
1831 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1832 bool fFlush;
1833 unsigned cChanges;
1834
1835 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1836 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1837 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1838 else
1839 {
1840 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1841 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1842 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1843 }
1844
1845 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1846 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1847 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1848 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1849
1850 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1851 Assert(pPage->cModifications);
1852 if (cChanges < 4)
1853 pPage->cModifications = 1; /* must use > 0 here */
1854 else
1855 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1856
1857 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1858 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1859 pPool->idxFreeDirtyPage = idxSlot;
1860
1861 pPool->cDirtyPages--;
1862 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1863 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1864 if (fFlush)
1865 {
1866 Assert(fAllowRemoval);
1867 Log(("Flush reused page table!\n"));
1868 pgmPoolFlushPage(pPool, pPage);
1869 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1870 }
1871 else
1872 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1873}
1874
1875
1876# ifndef IN_RING3
1877/**
1878 * Add a new dirty page
1879 *
1880 * @param pVM The cross context VM structure.
1881 * @param pPool The pool.
1882 * @param pPage The page.
1883 */
1884void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1885{
1886 PGM_LOCK_ASSERT_OWNER(pVM);
1887 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1888 Assert(!pPage->fDirty);
1889 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1890
1891 unsigned idxFree = pPool->idxFreeDirtyPage;
1892 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1893 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1894
1895 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1896 {
1897 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1898 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1899 }
1900 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1901 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1902
1903 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1904
1905 /*
1906 * Make a copy of the guest page table as we require valid GCPhys addresses
1907 * when removing references to physical pages.
1908 * (The HCPhys linear lookup is *extremely* expensive!)
1909 */
1910 void *pvGst;
1911 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1912 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1913 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1914# ifdef VBOX_STRICT
1915 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1916 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1917 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1918 else
1919 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1920 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1921# endif
1922 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1923
1924 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1925 pPage->fDirty = true;
1926 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1927 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1928 pPool->cDirtyPages++;
1929
1930 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1931 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1932 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1933 {
1934 unsigned i;
1935 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1936 {
1937 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1938 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1939 {
1940 pPool->idxFreeDirtyPage = idxFree;
1941 break;
1942 }
1943 }
1944 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1945 }
1946
1947 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1948
1949 /*
1950 * Clear all references to this shadow table. See @bugref{7298}.
1951 */
1952 pgmPoolTrackClearPageUsers(pPool, pPage);
1953}
1954# endif /* !IN_RING3 */
1955
1956
1957/**
1958 * Check if the specified page is dirty (not write monitored)
1959 *
1960 * @return dirty or not
1961 * @param pVM The cross context VM structure.
1962 * @param GCPhys Guest physical address
1963 */
1964bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1965{
1966 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1967 PGM_LOCK_ASSERT_OWNER(pVM);
1968 if (!pPool->cDirtyPages)
1969 return false;
1970
1971 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1972
1973 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1974 {
1975 unsigned idxPage = pPool->aidxDirtyPages[i];
1976 if (idxPage != NIL_PGMPOOL_IDX)
1977 {
1978 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1979 if (pPage->GCPhys == GCPhys)
1980 return true;
1981 }
1982 }
1983 return false;
1984}
1985
1986
1987/**
1988 * Reset all dirty pages by reinstating page monitoring.
1989 *
1990 * @param pVM The cross context VM structure.
1991 */
1992void pgmPoolResetDirtyPages(PVMCC pVM)
1993{
1994 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1995 PGM_LOCK_ASSERT_OWNER(pVM);
1996 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1997
1998 if (!pPool->cDirtyPages)
1999 return;
2000
2001 Log(("pgmPoolResetDirtyPages\n"));
2002 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2003 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
2004
2005 pPool->idxFreeDirtyPage = 0;
2006 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2007 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2008 {
2009 unsigned i;
2010 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2011 {
2012 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2013 {
2014 pPool->idxFreeDirtyPage = i;
2015 break;
2016 }
2017 }
2018 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2019 }
2020
2021 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
2022 return;
2023}
2024
2025
2026/**
2027 * Invalidate the PT entry for the specified page
2028 *
2029 * @param pVM The cross context VM structure.
2030 * @param GCPtrPage Guest page to invalidate
2031 */
2032void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
2033{
2034 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2035 PGM_LOCK_ASSERT_OWNER(pVM);
2036 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2037
2038 if (!pPool->cDirtyPages)
2039 return;
2040
2041 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2042 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2043 {
2044 /** @todo What was intended here??? This looks incomplete... */
2045 }
2046}
2047
2048
2049/**
2050 * Reset all dirty pages by reinstating page monitoring.
2051 *
2052 * @param pVM The cross context VM structure.
2053 * @param GCPhysPT Physical address of the page table
2054 */
2055void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
2056{
2057 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2058 PGM_LOCK_ASSERT_OWNER(pVM);
2059 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2060 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2061
2062 if (!pPool->cDirtyPages)
2063 return;
2064
2065 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2066
2067 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2068 {
2069 unsigned idxPage = pPool->aidxDirtyPages[i];
2070 if (idxPage != NIL_PGMPOOL_IDX)
2071 {
2072 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2073 if (pPage->GCPhys == GCPhysPT)
2074 {
2075 idxDirtyPage = i;
2076 break;
2077 }
2078 }
2079 }
2080
2081 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2082 {
2083 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2084 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2085 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2086 {
2087 unsigned i;
2088 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2089 {
2090 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2091 {
2092 pPool->idxFreeDirtyPage = i;
2093 break;
2094 }
2095 }
2096 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2097 }
2098 }
2099}
2100
2101#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2102
2103/**
2104 * Inserts a page into the GCPhys hash table.
2105 *
2106 * @param pPool The pool.
2107 * @param pPage The page.
2108 */
2109DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2110{
2111 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2112 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2113 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2114 pPage->iNext = pPool->aiHash[iHash];
2115 pPool->aiHash[iHash] = pPage->idx;
2116}
2117
2118
2119/**
2120 * Removes a page from the GCPhys hash table.
2121 *
2122 * @param pPool The pool.
2123 * @param pPage The page.
2124 */
2125DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2126{
2127 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2128 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2129 if (pPool->aiHash[iHash] == pPage->idx)
2130 pPool->aiHash[iHash] = pPage->iNext;
2131 else
2132 {
2133 uint16_t iPrev = pPool->aiHash[iHash];
2134 for (;;)
2135 {
2136 const int16_t i = pPool->aPages[iPrev].iNext;
2137 if (i == pPage->idx)
2138 {
2139 pPool->aPages[iPrev].iNext = pPage->iNext;
2140 break;
2141 }
2142 if (i == NIL_PGMPOOL_IDX)
2143 {
2144 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2145 break;
2146 }
2147 iPrev = i;
2148 }
2149 }
2150 pPage->iNext = NIL_PGMPOOL_IDX;
2151}
2152
2153
2154/**
2155 * Frees up one cache page.
2156 *
2157 * @returns VBox status code.
2158 * @retval VINF_SUCCESS on success.
2159 * @param pPool The pool.
2160 * @param iUser The user index.
2161 */
2162static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2163{
2164 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2165 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2166 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2167
2168 /*
2169 * Select one page from the tail of the age list.
2170 */
2171 PPGMPOOLPAGE pPage;
2172 for (unsigned iLoop = 0; ; iLoop++)
2173 {
2174 uint16_t iToFree = pPool->iAgeTail;
2175 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2176 iToFree = pPool->aPages[iToFree].iAgePrev;
2177/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2178 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2179 {
2180 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2181 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2182 {
2183 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2184 continue;
2185 iToFree = i;
2186 break;
2187 }
2188 }
2189*/
2190 Assert(iToFree != iUser);
2191 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2192 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2193 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2194 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2195 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2196 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2197
2198 pPage = &pPool->aPages[iToFree];
2199
2200 /*
2201 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2202 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2203 */
2204 if ( !pgmPoolIsPageLocked(pPage)
2205 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2206 break;
2207 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2208 pgmPoolCacheUsed(pPool, pPage);
2209 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2210 }
2211
2212 /*
2213 * Found a usable page, flush it and return.
2214 */
2215 int rc = pgmPoolFlushPage(pPool, pPage);
2216 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2217 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2218 if (rc == VINF_SUCCESS)
2219 PGM_INVL_ALL_VCPU_TLBS(pVM);
2220 return rc;
2221}
2222
2223
2224/**
2225 * Checks if a kind mismatch is really a page being reused
2226 * or if it's just normal remappings.
2227 *
2228 * @returns true if reused and the cached page (enmKind1) should be flushed
2229 * @returns false if not reused.
2230 * @param enmKind1 The kind of the cached page.
2231 * @param enmKind2 The kind of the requested page.
2232 */
2233static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2234{
2235 switch (enmKind1)
2236 {
2237 /*
2238 * Never reuse them. There is no remapping in non-paging mode.
2239 */
2240 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2241 case PGMPOOLKIND_32BIT_PD_PHYS:
2242 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2243 case PGMPOOLKIND_PAE_PD_PHYS:
2244 case PGMPOOLKIND_PAE_PDPT_PHYS:
2245 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2246 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2247 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2248 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2249 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2250 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2251 return false;
2252
2253 /*
2254 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2255 */
2256 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2257 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2258 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2259 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2260 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2261 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2262 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2263 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2264 case PGMPOOLKIND_32BIT_PD:
2265 case PGMPOOLKIND_PAE_PDPT:
2266 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2267 switch (enmKind2)
2268 {
2269 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2270 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2271 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2272 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2273 case PGMPOOLKIND_64BIT_PML4:
2274 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2275 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2276 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2277 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2278 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2279 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2280 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2281 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2282 return true;
2283 default:
2284 return false;
2285 }
2286
2287 /*
2288 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2289 */
2290 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2291 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2292 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2293 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2294 case PGMPOOLKIND_64BIT_PML4:
2295 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2296 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2297 switch (enmKind2)
2298 {
2299 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2300 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2301 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2302 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2303 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2304 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2305 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2306 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2307 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2308 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2309 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2310 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2311 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2312 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2313 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2314 return true;
2315 default:
2316 return false;
2317 }
2318
2319#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2320 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2321 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2322 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2323 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2324 return PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2);
2325
2326 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2327 return false;
2328#endif
2329
2330 /*
2331 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2332 */
2333 case PGMPOOLKIND_ROOT_NESTED:
2334 return false;
2335
2336 default:
2337 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2338 }
2339}
2340
2341
2342/**
2343 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2344 *
2345 * @returns VBox status code.
2346 * @retval VINF_PGM_CACHED_PAGE on success.
2347 * @retval VERR_FILE_NOT_FOUND if not found.
2348 * @param pPool The pool.
2349 * @param GCPhys The GC physical address of the page we're gonna shadow.
2350 * @param enmKind The kind of mapping.
2351 * @param enmAccess Access type for the mapping (only relevant for big pages)
2352 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2353 * @param iUser The shadow page pool index of the user table. This is
2354 * NIL_PGMPOOL_IDX for root pages.
2355 * @param iUserTable The index into the user table (shadowed). Ignored if
2356 * root page
2357 * @param ppPage Where to store the pointer to the page.
2358 */
2359static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2360 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2361{
2362 /*
2363 * Look up the GCPhys in the hash.
2364 */
2365 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2366 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2367 if (i != NIL_PGMPOOL_IDX)
2368 {
2369 do
2370 {
2371 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2372 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2373 if (pPage->GCPhys == GCPhys)
2374 {
2375 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2376 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2377 && pPage->fA20Enabled == fA20Enabled)
2378 {
2379 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2380 * doesn't flush it in case there are no more free use records.
2381 */
2382 pgmPoolCacheUsed(pPool, pPage);
2383
2384 int rc = VINF_SUCCESS;
2385 if (iUser != NIL_PGMPOOL_IDX)
2386 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2387 if (RT_SUCCESS(rc))
2388 {
2389 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2390 *ppPage = pPage;
2391 if (pPage->cModifications)
2392 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2393 STAM_COUNTER_INC(&pPool->StatCacheHits);
2394 return VINF_PGM_CACHED_PAGE;
2395 }
2396 return rc;
2397 }
2398
2399 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2400 {
2401 /*
2402 * The kind is different. In some cases we should now flush the page
2403 * as it has been reused, but in most cases this is normal remapping
2404 * of PDs as PT or big pages using the GCPhys field in a slightly
2405 * different way than the other kinds.
2406 */
2407 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2408 {
2409 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2410 pgmPoolFlushPage(pPool, pPage);
2411 break;
2412 }
2413 }
2414 }
2415
2416 /* next */
2417 i = pPage->iNext;
2418 } while (i != NIL_PGMPOOL_IDX);
2419 }
2420
2421 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2422 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2423 return VERR_FILE_NOT_FOUND;
2424}
2425
2426
2427/**
2428 * Inserts a page into the cache.
2429 *
2430 * @param pPool The pool.
2431 * @param pPage The cached page.
2432 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2433 */
2434static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2435{
2436 /*
2437 * Insert into the GCPhys hash if the page is fit for that.
2438 */
2439 Assert(!pPage->fCached);
2440 if (fCanBeCached)
2441 {
2442 pPage->fCached = true;
2443 pgmPoolHashInsert(pPool, pPage);
2444 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2445 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2446 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2447 }
2448 else
2449 {
2450 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2451 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2452 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2453 }
2454
2455 /*
2456 * Insert at the head of the age list.
2457 */
2458 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2459 pPage->iAgeNext = pPool->iAgeHead;
2460 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2461 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2462 else
2463 pPool->iAgeTail = pPage->idx;
2464 pPool->iAgeHead = pPage->idx;
2465}
2466
2467
2468/**
2469 * Flushes a cached page.
2470 *
2471 * @param pPool The pool.
2472 * @param pPage The cached page.
2473 */
2474static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2475{
2476 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2477
2478 /*
2479 * Remove the page from the hash.
2480 */
2481 if (pPage->fCached)
2482 {
2483 pPage->fCached = false;
2484 pgmPoolHashRemove(pPool, pPage);
2485 }
2486 else
2487 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2488
2489 /*
2490 * Remove it from the age list.
2491 */
2492 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2493 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2494 else
2495 pPool->iAgeTail = pPage->iAgePrev;
2496 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2497 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2498 else
2499 pPool->iAgeHead = pPage->iAgeNext;
2500 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2501 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2502}
2503
2504
2505/**
2506 * Looks for pages sharing the monitor.
2507 *
2508 * @returns Pointer to the head page.
2509 * @returns NULL if not found.
2510 * @param pPool The Pool
2511 * @param pNewPage The page which is going to be monitored.
2512 */
2513static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2514{
2515 /*
2516 * Look up the GCPhys in the hash.
2517 */
2518 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2519 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2520 if (i == NIL_PGMPOOL_IDX)
2521 return NULL;
2522 do
2523 {
2524 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2525 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2526 && pPage != pNewPage)
2527 {
2528 switch (pPage->enmKind)
2529 {
2530 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2531 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2532 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2533 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2534 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2535 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2536 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2537 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2538 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2539 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2540 case PGMPOOLKIND_64BIT_PML4:
2541 case PGMPOOLKIND_32BIT_PD:
2542 case PGMPOOLKIND_PAE_PDPT:
2543#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2544 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2545 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2546 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2547#endif
2548 {
2549 /* find the head */
2550 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2551 {
2552 Assert(pPage->iMonitoredPrev != pPage->idx);
2553 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2554 }
2555 return pPage;
2556 }
2557
2558 /* ignore, no monitoring. */
2559 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2560 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2561 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2562 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2563 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2564 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2565 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2566 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2567 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2568 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2569 case PGMPOOLKIND_ROOT_NESTED:
2570 case PGMPOOLKIND_PAE_PD_PHYS:
2571 case PGMPOOLKIND_PAE_PDPT_PHYS:
2572 case PGMPOOLKIND_32BIT_PD_PHYS:
2573 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2574#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2575 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2576 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2577#endif
2578 break;
2579 default:
2580 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2581 }
2582 }
2583
2584 /* next */
2585 i = pPage->iNext;
2586 } while (i != NIL_PGMPOOL_IDX);
2587 return NULL;
2588}
2589
2590
2591/**
2592 * Enabled write monitoring of a guest page.
2593 *
2594 * @returns VBox status code.
2595 * @retval VINF_SUCCESS on success.
2596 * @param pPool The pool.
2597 * @param pPage The cached page.
2598 */
2599static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2600{
2601 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2602
2603 /*
2604 * Filter out the relevant kinds.
2605 */
2606 switch (pPage->enmKind)
2607 {
2608 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2609 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2610 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2611 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2612 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2613 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2614 case PGMPOOLKIND_64BIT_PML4:
2615 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2616 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2617 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2618 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2619 case PGMPOOLKIND_32BIT_PD:
2620 case PGMPOOLKIND_PAE_PDPT:
2621 break;
2622
2623 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2624 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2625 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2626 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2627 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2628 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2629 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2630 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2631 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2632 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2633 case PGMPOOLKIND_ROOT_NESTED:
2634 /* Nothing to monitor here. */
2635 return VINF_SUCCESS;
2636
2637 case PGMPOOLKIND_32BIT_PD_PHYS:
2638 case PGMPOOLKIND_PAE_PDPT_PHYS:
2639 case PGMPOOLKIND_PAE_PD_PHYS:
2640 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2641 /* Nothing to monitor here. */
2642 return VINF_SUCCESS;
2643
2644#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2645 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2646 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2647 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2648 break;
2649
2650 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2651 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2652 /* Nothing to monitor here. */
2653 return VINF_SUCCESS;
2654#endif
2655
2656 default:
2657 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2658 }
2659
2660 /*
2661 * Install handler.
2662 */
2663 int rc;
2664 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2665 if (pPageHead)
2666 {
2667 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2668 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2669
2670#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2671 if (pPageHead->fDirty)
2672 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2673#endif
2674
2675 pPage->iMonitoredPrev = pPageHead->idx;
2676 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2677 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2678 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2679 pPageHead->iMonitoredNext = pPage->idx;
2680 rc = VINF_SUCCESS;
2681 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2682 Log7Func(("Adding to monitoring list GCPhysPage=%RGp\n", pPage->GCPhys));
2683 }
2684 else
2685 {
2686 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2687 Log7Func(("Started monitoring GCPhysPage=%RGp HCPhys=%RHp enmKind=%s\n", pPage->GCPhys, pPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
2688
2689 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2690 PVMCC pVM = pPool->CTX_SUFF(pVM);
2691 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2692 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2693 pPage - &pPool->aPages[0], NIL_RTR3PTR /*pszDesc*/);
2694 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2695 * the heap size should suffice. */
2696 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2697 PVMCPU pVCpu = VMMGetCpu(pVM);
2698 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2699 }
2700 pPage->fMonitored = true;
2701 return rc;
2702}
2703
2704
2705/**
2706 * Disables write monitoring of a guest page.
2707 *
2708 * @returns VBox status code.
2709 * @retval VINF_SUCCESS on success.
2710 * @param pPool The pool.
2711 * @param pPage The cached page.
2712 */
2713static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2714{
2715 /*
2716 * Filter out the relevant kinds.
2717 */
2718 switch (pPage->enmKind)
2719 {
2720 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2721 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2722 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2723 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2724 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2725 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2726 case PGMPOOLKIND_64BIT_PML4:
2727 case PGMPOOLKIND_32BIT_PD:
2728 case PGMPOOLKIND_PAE_PDPT:
2729 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2730 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2731 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2732 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2733 break;
2734
2735 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2736 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2737 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2738 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2739 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2740 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2741 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2742 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2743 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2744 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2745 case PGMPOOLKIND_ROOT_NESTED:
2746 case PGMPOOLKIND_PAE_PD_PHYS:
2747 case PGMPOOLKIND_PAE_PDPT_PHYS:
2748 case PGMPOOLKIND_32BIT_PD_PHYS:
2749 /* Nothing to monitor here. */
2750 Assert(!pPage->fMonitored);
2751 return VINF_SUCCESS;
2752
2753#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2754 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2755 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2756 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2757 break;
2758
2759 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2760 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2761 /* Nothing to monitor here. */
2762 Assert(!pPage->fMonitored);
2763 return VINF_SUCCESS;
2764#endif
2765
2766 default:
2767 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2768 }
2769 Assert(pPage->fMonitored);
2770
2771 /*
2772 * Remove the page from the monitored list or uninstall it if last.
2773 */
2774 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2775 int rc;
2776 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2777 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2778 {
2779 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2780 {
2781 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2782 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2783 rc = PGMHandlerPhysicalChangeUserArg(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, pPage->iMonitoredNext);
2784
2785 AssertFatalRCSuccess(rc);
2786 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2787 }
2788 else
2789 {
2790 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2791 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2792 {
2793 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2794 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2795 }
2796 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2797 rc = VINF_SUCCESS;
2798 }
2799 }
2800 else
2801 {
2802 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2803 AssertFatalRC(rc);
2804 PVMCPU pVCpu = VMMGetCpu(pVM);
2805 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2806 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2807 }
2808 pPage->fMonitored = false;
2809
2810 /*
2811 * Remove it from the list of modified pages (if in it).
2812 */
2813 pgmPoolMonitorModifiedRemove(pPool, pPage);
2814
2815 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2816 Log7Func(("Stopped monitoring %RGp\n", pPage->GCPhys));
2817
2818 return rc;
2819}
2820
2821
2822/**
2823 * Inserts the page into the list of modified pages.
2824 *
2825 * @param pPool The pool.
2826 * @param pPage The page.
2827 */
2828void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2829{
2830 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2831 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2832 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2833 && pPool->iModifiedHead != pPage->idx,
2834 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2835 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2836 pPool->iModifiedHead, pPool->cModifiedPages));
2837
2838 pPage->iModifiedNext = pPool->iModifiedHead;
2839 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2840 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2841 pPool->iModifiedHead = pPage->idx;
2842 pPool->cModifiedPages++;
2843#ifdef VBOX_WITH_STATISTICS
2844 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2845 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2846#endif
2847}
2848
2849
2850/**
2851 * Removes the page from the list of modified pages and resets the
2852 * modification counter.
2853 *
2854 * @param pPool The pool.
2855 * @param pPage The page which is believed to be in the list of modified pages.
2856 */
2857static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2858{
2859 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2860 if (pPool->iModifiedHead == pPage->idx)
2861 {
2862 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2863 pPool->iModifiedHead = pPage->iModifiedNext;
2864 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2865 {
2866 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2867 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2868 }
2869 pPool->cModifiedPages--;
2870 }
2871 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2872 {
2873 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2874 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2875 {
2876 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2877 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2878 }
2879 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2880 pPool->cModifiedPages--;
2881 }
2882 else
2883 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2884 pPage->cModifications = 0;
2885}
2886
2887
2888/**
2889 * Zaps the list of modified pages, resetting their modification counters in the process.
2890 *
2891 * @param pVM The cross context VM structure.
2892 */
2893static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2894{
2895 PGM_LOCK_VOID(pVM);
2896 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2897 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2898
2899 unsigned cPages = 0; NOREF(cPages);
2900
2901#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2902 pgmPoolResetDirtyPages(pVM);
2903#endif
2904
2905 uint16_t idx = pPool->iModifiedHead;
2906 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2907 while (idx != NIL_PGMPOOL_IDX)
2908 {
2909 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2910 idx = pPage->iModifiedNext;
2911 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2912 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2913 pPage->cModifications = 0;
2914 Assert(++cPages);
2915 }
2916 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2917 pPool->cModifiedPages = 0;
2918 PGM_UNLOCK(pVM);
2919}
2920
2921
2922/**
2923 * Handle SyncCR3 pool tasks
2924 *
2925 * @returns VBox status code.
2926 * @retval VINF_SUCCESS if successfully added.
2927 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2928 * @param pVCpu The cross context virtual CPU structure.
2929 * @remark Should only be used when monitoring is available, thus placed in
2930 * the PGMPOOL_WITH_MONITORING \#ifdef.
2931 */
2932int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2933{
2934 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2935 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2936
2937 /*
2938 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2939 * Occasionally we will have to clear all the shadow page tables because we wanted
2940 * to monitor a page which was mapped by too many shadowed page tables. This operation
2941 * sometimes referred to as a 'lightweight flush'.
2942 */
2943# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2944 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2945 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2946# else /* !IN_RING3 */
2947 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2948 {
2949 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2950 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2951
2952 /* Make sure all other VCPUs return to ring 3. */
2953 if (pVM->cCpus > 1)
2954 {
2955 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2956 PGM_INVL_ALL_VCPU_TLBS(pVM);
2957 }
2958 return VINF_PGM_SYNC_CR3;
2959 }
2960# endif /* !IN_RING3 */
2961 else
2962 {
2963 pgmPoolMonitorModifiedClearAll(pVM);
2964
2965 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2966 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2967 {
2968 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2969 return pgmPoolSyncCR3(pVCpu);
2970 }
2971 }
2972 return VINF_SUCCESS;
2973}
2974
2975
2976/**
2977 * Frees up at least one user entry.
2978 *
2979 * @returns VBox status code.
2980 * @retval VINF_SUCCESS if successfully added.
2981 *
2982 * @param pPool The pool.
2983 * @param iUser The user index.
2984 */
2985static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2986{
2987 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2988 /*
2989 * Just free cached pages in a braindead fashion.
2990 */
2991 /** @todo walk the age list backwards and free the first with usage. */
2992 int rc = VINF_SUCCESS;
2993 do
2994 {
2995 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2996 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2997 rc = rc2;
2998 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2999 return rc;
3000}
3001
3002
3003/**
3004 * Inserts a page into the cache.
3005 *
3006 * This will create user node for the page, insert it into the GCPhys
3007 * hash, and insert it into the age list.
3008 *
3009 * @returns VBox status code.
3010 * @retval VINF_SUCCESS if successfully added.
3011 *
3012 * @param pPool The pool.
3013 * @param pPage The cached page.
3014 * @param GCPhys The GC physical address of the page we're gonna shadow.
3015 * @param iUser The user index.
3016 * @param iUserTable The user table index.
3017 */
3018DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
3019{
3020 int rc = VINF_SUCCESS;
3021 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3022
3023 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
3024
3025 if (iUser != NIL_PGMPOOL_IDX)
3026 {
3027#ifdef VBOX_STRICT
3028 /*
3029 * Check that the entry doesn't already exists.
3030 */
3031 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3032 {
3033 uint16_t i = pPage->iUserHead;
3034 do
3035 {
3036 Assert(i < pPool->cMaxUsers);
3037 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3038 i = paUsers[i].iNext;
3039 } while (i != NIL_PGMPOOL_USER_INDEX);
3040 }
3041#endif
3042
3043 /*
3044 * Find free a user node.
3045 */
3046 uint16_t i = pPool->iUserFreeHead;
3047 if (i == NIL_PGMPOOL_USER_INDEX)
3048 {
3049 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3050 if (RT_FAILURE(rc))
3051 return rc;
3052 i = pPool->iUserFreeHead;
3053 }
3054
3055 /*
3056 * Unlink the user node from the free list,
3057 * initialize and insert it into the user list.
3058 */
3059 pPool->iUserFreeHead = paUsers[i].iNext;
3060 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
3061 paUsers[i].iUser = iUser;
3062 paUsers[i].iUserTable = iUserTable;
3063 pPage->iUserHead = i;
3064 }
3065 else
3066 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3067
3068
3069 /*
3070 * Insert into cache and enable monitoring of the guest page if enabled.
3071 *
3072 * Until we implement caching of all levels, including the CR3 one, we'll
3073 * have to make sure we don't try monitor & cache any recursive reuse of
3074 * a monitored CR3 page. Because all windows versions are doing this we'll
3075 * have to be able to do combined access monitoring, CR3 + PT and
3076 * PD + PT (guest PAE).
3077 *
3078 * Update:
3079 * We're now cooperating with the CR3 monitor if an uncachable page is found.
3080 */
3081 const bool fCanBeMonitored = true;
3082 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
3083 if (fCanBeMonitored)
3084 {
3085 rc = pgmPoolMonitorInsert(pPool, pPage);
3086 AssertRC(rc);
3087 }
3088 return rc;
3089}
3090
3091
3092/**
3093 * Adds a user reference to a page.
3094 *
3095 * This will move the page to the head of the
3096 *
3097 * @returns VBox status code.
3098 * @retval VINF_SUCCESS if successfully added.
3099 *
3100 * @param pPool The pool.
3101 * @param pPage The cached page.
3102 * @param iUser The user index.
3103 * @param iUserTable The user table.
3104 */
3105static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3106{
3107 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3108 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3109 Assert(iUser != NIL_PGMPOOL_IDX);
3110
3111# ifdef VBOX_STRICT
3112 /*
3113 * Check that the entry doesn't already exists. We only allow multiple
3114 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3115 */
3116 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3117 {
3118 uint16_t i = pPage->iUserHead;
3119 do
3120 {
3121 Assert(i < pPool->cMaxUsers);
3122 /** @todo this assertion looks odd... Shouldn't it be && here? */
3123 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3124 i = paUsers[i].iNext;
3125 } while (i != NIL_PGMPOOL_USER_INDEX);
3126 }
3127# endif
3128
3129 /*
3130 * Allocate a user node.
3131 */
3132 uint16_t i = pPool->iUserFreeHead;
3133 if (i == NIL_PGMPOOL_USER_INDEX)
3134 {
3135 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3136 if (RT_FAILURE(rc))
3137 return rc;
3138 i = pPool->iUserFreeHead;
3139 }
3140 pPool->iUserFreeHead = paUsers[i].iNext;
3141
3142 /*
3143 * Initialize the user node and insert it.
3144 */
3145 paUsers[i].iNext = pPage->iUserHead;
3146 paUsers[i].iUser = iUser;
3147 paUsers[i].iUserTable = iUserTable;
3148 pPage->iUserHead = i;
3149
3150# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3151 if (pPage->fDirty)
3152 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3153# endif
3154
3155 /*
3156 * Tell the cache to update its replacement stats for this page.
3157 */
3158 pgmPoolCacheUsed(pPool, pPage);
3159 return VINF_SUCCESS;
3160}
3161
3162
3163/**
3164 * Frees a user record associated with a page.
3165 *
3166 * This does not clear the entry in the user table, it simply replaces the
3167 * user record to the chain of free records.
3168 *
3169 * @param pPool The pool.
3170 * @param pPage The shadow page.
3171 * @param iUser The shadow page pool index of the user table.
3172 * @param iUserTable The index into the user table (shadowed).
3173 *
3174 * @remarks Don't call this for root pages.
3175 */
3176static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3177{
3178 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3179 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3180 Assert(iUser != NIL_PGMPOOL_IDX);
3181
3182 /*
3183 * Unlink and free the specified user entry.
3184 */
3185
3186 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3187 uint16_t i = pPage->iUserHead;
3188 if ( i != NIL_PGMPOOL_USER_INDEX
3189 && paUsers[i].iUser == iUser
3190 && paUsers[i].iUserTable == iUserTable)
3191 {
3192 pPage->iUserHead = paUsers[i].iNext;
3193
3194 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3195 paUsers[i].iNext = pPool->iUserFreeHead;
3196 pPool->iUserFreeHead = i;
3197 return;
3198 }
3199
3200 /* General: Linear search. */
3201 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3202 while (i != NIL_PGMPOOL_USER_INDEX)
3203 {
3204 if ( paUsers[i].iUser == iUser
3205 && paUsers[i].iUserTable == iUserTable)
3206 {
3207 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3208 paUsers[iPrev].iNext = paUsers[i].iNext;
3209 else
3210 pPage->iUserHead = paUsers[i].iNext;
3211
3212 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3213 paUsers[i].iNext = pPool->iUserFreeHead;
3214 pPool->iUserFreeHead = i;
3215 return;
3216 }
3217 iPrev = i;
3218 i = paUsers[i].iNext;
3219 }
3220
3221 /* Fatal: didn't find it */
3222 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3223 iUser, iUserTable, pPage->GCPhys));
3224}
3225
3226
3227#if 0 /* unused */
3228/**
3229 * Gets the entry size of a shadow table.
3230 *
3231 * @param enmKind The kind of page.
3232 *
3233 * @returns The size of the entry in bytes. That is, 4 or 8.
3234 * @returns If the kind is not for a table, an assertion is raised and 0 is
3235 * returned.
3236 */
3237DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3238{
3239 switch (enmKind)
3240 {
3241 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3242 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3243 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3244 case PGMPOOLKIND_32BIT_PD:
3245 case PGMPOOLKIND_32BIT_PD_PHYS:
3246 return 4;
3247
3248 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3249 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3250 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3251 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3252 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3253 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3254 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3255 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3256 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3257 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3258 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3259 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3260 case PGMPOOLKIND_64BIT_PML4:
3261 case PGMPOOLKIND_PAE_PDPT:
3262 case PGMPOOLKIND_ROOT_NESTED:
3263 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3264 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3265 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3266 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3267 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3268 case PGMPOOLKIND_PAE_PD_PHYS:
3269 case PGMPOOLKIND_PAE_PDPT_PHYS:
3270 return 8;
3271
3272 default:
3273 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3274 }
3275}
3276#endif /* unused */
3277
3278#if 0 /* unused */
3279/**
3280 * Gets the entry size of a guest table.
3281 *
3282 * @param enmKind The kind of page.
3283 *
3284 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3285 * @returns If the kind is not for a table, an assertion is raised and 0 is
3286 * returned.
3287 */
3288DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3289{
3290 switch (enmKind)
3291 {
3292 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3293 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3294 case PGMPOOLKIND_32BIT_PD:
3295 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3296 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3297 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3298 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3299 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3300 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3301 return 4;
3302
3303 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3304 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3305 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3306 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3307 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3308 case PGMPOOLKIND_64BIT_PML4:
3309 case PGMPOOLKIND_PAE_PDPT:
3310 return 8;
3311
3312 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3313 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3314 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3315 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3316 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3317 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3318 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3319 case PGMPOOLKIND_ROOT_NESTED:
3320 case PGMPOOLKIND_PAE_PD_PHYS:
3321 case PGMPOOLKIND_PAE_PDPT_PHYS:
3322 case PGMPOOLKIND_32BIT_PD_PHYS:
3323 /** @todo can we return 0? (nobody is calling this...) */
3324 AssertFailed();
3325 return 0;
3326
3327 default:
3328 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3329 }
3330}
3331#endif /* unused */
3332
3333
3334/**
3335 * Checks one shadow page table entry for a mapping of a physical page.
3336 *
3337 * @returns true / false indicating removal of all relevant PTEs
3338 *
3339 * @param pVM The cross context VM structure.
3340 * @param pPhysPage The guest page in question.
3341 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3342 * @param iShw The shadow page table.
3343 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3344 */
3345static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3346{
3347 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3348 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3349 bool fRet = false;
3350
3351 /*
3352 * Assert sanity.
3353 */
3354 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3355 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3356 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3357
3358 /*
3359 * Then, clear the actual mappings to the page in the shadow PT.
3360 */
3361 switch (pPage->enmKind)
3362 {
3363 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3364 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3365 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3366 {
3367 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3368 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3369 uint32_t u32AndMask = 0;
3370 uint32_t u32OrMask = 0;
3371
3372 if (!fFlushPTEs)
3373 {
3374 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3375 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3376 {
3377 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3378 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3379 u32OrMask = X86_PTE_RW;
3380 u32AndMask = UINT32_MAX;
3381 fRet = true;
3382 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3383 break;
3384
3385 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3386 u32OrMask = 0;
3387 u32AndMask = ~X86_PTE_RW;
3388 fRet = true;
3389 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3390 break;
3391 default:
3392 /* We will end up here when called with an "ALL" access handler. */
3393 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3394 break;
3395 }
3396 }
3397 else
3398 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3399
3400 /* Update the counter if we're removing references. */
3401 if (!u32AndMask)
3402 {
3403 Assert(pPage->cPresent);
3404 Assert(pPool->cPresent);
3405 pPage->cPresent--;
3406 pPool->cPresent--;
3407 }
3408
3409 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3410 {
3411 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3412 X86PTE Pte;
3413 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3414 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3415 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3416
3417 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3418 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3419 return fRet;
3420 }
3421#ifdef LOG_ENABLED
3422 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3423 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3424 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3425 {
3426 Log(("i=%d cFound=%d\n", i, ++cFound));
3427 }
3428#endif
3429 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3430 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3431 break;
3432 }
3433
3434 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3435 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3436 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3437 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3438 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3439 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3440#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
3441 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
3442# ifdef PGM_WITH_LARGE_PAGES
3443 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
3444# endif
3445#endif
3446 {
3447 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3448 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3449 uint64_t u64OrMask = 0;
3450 uint64_t u64AndMask = 0;
3451
3452 if (!fFlushPTEs)
3453 {
3454 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3455 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3456 {
3457 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3458 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3459 u64OrMask = X86_PTE_RW;
3460 u64AndMask = UINT64_MAX;
3461 fRet = true;
3462 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3463 break;
3464
3465 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3466 u64OrMask = 0;
3467 u64AndMask = ~(uint64_t)X86_PTE_RW;
3468 fRet = true;
3469 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3470 break;
3471
3472 default:
3473 /* We will end up here when called with an "ALL" access handler. */
3474 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3475 break;
3476 }
3477 }
3478 else
3479 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3480
3481 /* Update the counter if we're removing references. */
3482 if (!u64AndMask)
3483 {
3484 Assert(pPage->cPresent);
3485 Assert(pPool->cPresent);
3486 pPage->cPresent--;
3487 pPool->cPresent--;
3488 }
3489
3490 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3491 {
3492 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3493 X86PTEPAE Pte;
3494 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3495 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3496 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3497
3498 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3499 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3500 return fRet;
3501 }
3502#ifdef LOG_ENABLED
3503 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3504 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3505 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3506 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3507 Log(("i=%d cFound=%d\n", i, ++cFound));
3508#endif
3509 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3510 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3511 break;
3512 }
3513
3514#ifdef PGM_WITH_LARGE_PAGES
3515 /* Large page case only. */
3516 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3517 {
3518 Assert(pVM->pgm.s.fNestedPaging);
3519
3520 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3521 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3522
3523 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3524 {
3525 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3526 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3527 pPD->a[iPte].u = 0;
3528 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3529
3530 /* Update the counter as we're removing references. */
3531 Assert(pPage->cPresent);
3532 Assert(pPool->cPresent);
3533 pPage->cPresent--;
3534 pPool->cPresent--;
3535
3536 return fRet;
3537 }
3538# ifdef LOG_ENABLED
3539 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3540 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3541 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3542 Log(("i=%d cFound=%d\n", i, ++cFound));
3543# endif
3544 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3545 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3546 break;
3547 }
3548
3549 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3550 case PGMPOOLKIND_PAE_PD_PHYS:
3551 {
3552 Assert(pVM->pgm.s.fNestedPaging);
3553
3554 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3555 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3556
3557 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3558 {
3559 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3560 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3561 pPD->a[iPte].u = 0;
3562 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3563
3564 /* Update the counter as we're removing references. */
3565 Assert(pPage->cPresent);
3566 Assert(pPool->cPresent);
3567 pPage->cPresent--;
3568 pPool->cPresent--;
3569 return fRet;
3570 }
3571# ifdef LOG_ENABLED
3572 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3573 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3574 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3575 Log(("i=%d cFound=%d\n", i, ++cFound));
3576# endif
3577 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3578 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3579 break;
3580 }
3581#endif /* PGM_WITH_LARGE_PAGES */
3582
3583 default:
3584 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3585 }
3586
3587 /* not reached. */
3588#ifndef _MSC_VER
3589 return fRet;
3590#endif
3591}
3592
3593
3594/**
3595 * Scans one shadow page table for mappings of a physical page.
3596 *
3597 * @param pVM The cross context VM structure.
3598 * @param pPhysPage The guest page in question.
3599 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3600 * @param iShw The shadow page table.
3601 */
3602static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3603{
3604 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3605
3606 /* We should only come here with when there's only one reference to this physical page. */
3607 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3608
3609 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3610 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3611 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3612 if (!fKeptPTEs)
3613 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3614 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3615}
3616
3617
3618/**
3619 * Flushes a list of shadow page tables mapping the same physical page.
3620 *
3621 * @param pVM The cross context VM structure.
3622 * @param pPhysPage The guest page in question.
3623 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3624 * @param iPhysExt The physical cross reference extent list to flush.
3625 */
3626static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3627{
3628 PGM_LOCK_ASSERT_OWNER(pVM);
3629 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3630 bool fKeepList = false;
3631
3632 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3633 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3634
3635 const uint16_t iPhysExtStart = iPhysExt;
3636 PPGMPOOLPHYSEXT pPhysExt;
3637 do
3638 {
3639 Assert(iPhysExt < pPool->cMaxPhysExts);
3640 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3641 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3642 {
3643 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3644 {
3645 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3646 if (!fKeptPTEs)
3647 {
3648 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3649 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3650 }
3651 else
3652 fKeepList = true;
3653 }
3654 }
3655 /* next */
3656 iPhysExt = pPhysExt->iNext;
3657 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3658
3659 if (!fKeepList)
3660 {
3661 /* insert the list into the free list and clear the ram range entry. */
3662 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3663 pPool->iPhysExtFreeHead = iPhysExtStart;
3664 /* Invalidate the tracking data. */
3665 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3666 }
3667
3668 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3669}
3670
3671
3672/**
3673 * Flushes all shadow page table mappings of the given guest page.
3674 *
3675 * This is typically called when the host page backing the guest one has been
3676 * replaced or when the page protection was changed due to a guest access
3677 * caught by the monitoring.
3678 *
3679 * @returns VBox status code.
3680 * @retval VINF_SUCCESS if all references has been successfully cleared.
3681 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3682 * pool cleaning. FF and sync flags are set.
3683 *
3684 * @param pVM The cross context VM structure.
3685 * @param GCPhysPage GC physical address of the page in question
3686 * @param pPhysPage The guest page in question.
3687 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3688 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3689 * flushed, it is NOT touched if this isn't necessary.
3690 * The caller MUST initialized this to @a false.
3691 */
3692int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3693{
3694 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3695 PGM_LOCK_VOID(pVM);
3696 int rc = VINF_SUCCESS;
3697
3698#ifdef PGM_WITH_LARGE_PAGES
3699 /* Is this page part of a large page? */
3700 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3701 {
3702 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3703 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3704
3705 /* Fetch the large page base. */
3706 PPGMPAGE pLargePage;
3707 if (GCPhysBase != GCPhysPage)
3708 {
3709 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3710 AssertFatal(pLargePage);
3711 }
3712 else
3713 pLargePage = pPhysPage;
3714
3715 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3716
3717 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3718 {
3719 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3720 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3721 pVM->pgm.s.cLargePagesDisabled++;
3722
3723 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3724 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3725
3726 *pfFlushTLBs = true;
3727 PGM_UNLOCK(pVM);
3728 return rc;
3729 }
3730 }
3731#else
3732 NOREF(GCPhysPage);
3733#endif /* PGM_WITH_LARGE_PAGES */
3734
3735 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3736 if (u16)
3737 {
3738 /*
3739 * The zero page is currently screwing up the tracking and we'll
3740 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3741 * is defined, zero pages won't normally be mapped. Some kind of solution
3742 * will be needed for this problem of course, but it will have to wait...
3743 */
3744 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3745 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3746 rc = VINF_PGM_GCPHYS_ALIASED;
3747 else
3748 {
3749 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3750 {
3751 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3752 pgmPoolTrackFlushGCPhysPT(pVM,
3753 pPhysPage,
3754 fFlushPTEs,
3755 PGMPOOL_TD_GET_IDX(u16));
3756 }
3757 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3758 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3759 else
3760 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3761 *pfFlushTLBs = true;
3762 }
3763 }
3764
3765 if (rc == VINF_PGM_GCPHYS_ALIASED)
3766 {
3767 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3768 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3769 rc = VINF_PGM_SYNC_CR3;
3770 }
3771 PGM_UNLOCK(pVM);
3772 return rc;
3773}
3774
3775
3776/**
3777 * Scans all shadow page tables for mappings of a physical page.
3778 *
3779 * This may be slow, but it's most likely more efficient than cleaning
3780 * out the entire page pool / cache.
3781 *
3782 * @returns VBox status code.
3783 * @retval VINF_SUCCESS if all references has been successfully cleared.
3784 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3785 * a page pool cleaning.
3786 *
3787 * @param pVM The cross context VM structure.
3788 * @param pPhysPage The guest page in question.
3789 */
3790int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3791{
3792 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3793 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3794 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3795 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3796
3797 /*
3798 * There is a limit to what makes sense.
3799 */
3800 if ( pPool->cPresent > 1024
3801 && pVM->cCpus == 1)
3802 {
3803 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3804 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3805 return VINF_PGM_GCPHYS_ALIASED;
3806 }
3807
3808 /*
3809 * Iterate all the pages until we've encountered all that in use.
3810 * This is simple but not quite optimal solution.
3811 */
3812 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3813 unsigned cLeft = pPool->cUsedPages;
3814 unsigned iPage = pPool->cCurPages;
3815 while (--iPage >= PGMPOOL_IDX_FIRST)
3816 {
3817 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3818 if ( pPage->GCPhys != NIL_RTGCPHYS
3819 && pPage->cPresent)
3820 {
3821 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* see if it hits */
3822 switch (pPage->enmKind)
3823 {
3824 /*
3825 * We only care about shadow page tables.
3826 */
3827 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3828 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3829 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3830 {
3831 const uint32_t u32 = (uint32_t)u64;
3832 unsigned cPresent = pPage->cPresent;
3833 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3834 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3835 {
3836 const X86PGUINT uPte = pPT->a[i].u;
3837 if (uPte & X86_PTE_P)
3838 {
3839 if ((uPte & X86_PTE_PG_MASK) == u32)
3840 {
3841 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3842 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3843
3844 /* Update the counter as we're removing references. */
3845 Assert(pPage->cPresent);
3846 Assert(pPool->cPresent);
3847 pPage->cPresent--;
3848 pPool->cPresent--;
3849 }
3850 if (!--cPresent)
3851 break;
3852 }
3853 }
3854 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3855 break;
3856 }
3857
3858 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3859 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3860 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3861 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3862 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3863 {
3864 unsigned cPresent = pPage->cPresent;
3865 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3866 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3867 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3868 {
3869 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3870 {
3871 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3872 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3873
3874 /* Update the counter as we're removing references. */
3875 Assert(pPage->cPresent);
3876 Assert(pPool->cPresent);
3877 pPage->cPresent--;
3878 pPool->cPresent--;
3879 }
3880 if (!--cPresent)
3881 break;
3882 }
3883 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3884 break;
3885 }
3886
3887 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3888 {
3889 unsigned cPresent = pPage->cPresent;
3890 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3891 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3892 {
3893 X86PGPAEUINT const uPte = pPT->a[i].u;
3894 if (uPte & EPT_E_READ)
3895 {
3896 if ((uPte & EPT_PTE_PG_MASK) == u64)
3897 {
3898 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3899 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3900
3901 /* Update the counter as we're removing references. */
3902 Assert(pPage->cPresent);
3903 Assert(pPool->cPresent);
3904 pPage->cPresent--;
3905 pPool->cPresent--;
3906 }
3907 if (!--cPresent)
3908 break;
3909 }
3910 }
3911 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3912 break;
3913 }
3914 }
3915
3916 if (!--cLeft)
3917 break;
3918 }
3919 }
3920
3921 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3922 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3923
3924 /*
3925 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3926 */
3927 if (pPool->cPresent > 1024)
3928 {
3929 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3930 return VINF_PGM_GCPHYS_ALIASED;
3931 }
3932
3933 return VINF_SUCCESS;
3934}
3935
3936
3937/**
3938 * Clears the user entry in a user table.
3939 *
3940 * This is used to remove all references to a page when flushing it.
3941 */
3942static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3943{
3944 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3945 Assert(pUser->iUser < pPool->cCurPages);
3946 uint32_t iUserTable = pUser->iUserTable;
3947
3948 /*
3949 * Map the user page. Ignore references made by fictitious pages.
3950 */
3951 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3952 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3953 union
3954 {
3955 uint64_t *pau64;
3956 uint32_t *pau32;
3957 } u;
3958 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3959 {
3960 Assert(!pUserPage->pvPageR3);
3961 return;
3962 }
3963 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3964
3965
3966 /* Safety precaution in case we change the paging for other modes too in the future. */
3967 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3968
3969#ifdef VBOX_STRICT
3970 /*
3971 * Some sanity checks.
3972 */
3973 switch (pUserPage->enmKind)
3974 {
3975 case PGMPOOLKIND_32BIT_PD:
3976 case PGMPOOLKIND_32BIT_PD_PHYS:
3977 Assert(iUserTable < X86_PG_ENTRIES);
3978 break;
3979 case PGMPOOLKIND_PAE_PDPT:
3980 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3981 case PGMPOOLKIND_PAE_PDPT_PHYS:
3982 Assert(iUserTable < 4);
3983 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3984 break;
3985 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3986 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3987 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3988 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3989 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3990 case PGMPOOLKIND_PAE_PD_PHYS:
3991 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3992 break;
3993 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3994 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3995 break;
3996 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3997 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3998 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3999 break;
4000 case PGMPOOLKIND_64BIT_PML4:
4001 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4002 /* GCPhys >> PAGE_SHIFT is the index here */
4003 break;
4004 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4005 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4006 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4007 break;
4008
4009 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4010 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4011 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4012 break;
4013
4014 case PGMPOOLKIND_ROOT_NESTED:
4015 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4016 break;
4017
4018# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4019 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4020 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
4021 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4022 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4023 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4024 Assert(iUserTable < EPT_PG_ENTRIES);
4025 break;
4026# endif
4027
4028 default:
4029 AssertMsgFailed(("enmKind=%d GCPhys=%RGp\n", pUserPage->enmKind, pPage->GCPhys));
4030 break;
4031 }
4032#endif /* VBOX_STRICT */
4033
4034 /*
4035 * Clear the entry in the user page.
4036 */
4037 switch (pUserPage->enmKind)
4038 {
4039 /* 32-bit entries */
4040 case PGMPOOLKIND_32BIT_PD:
4041 case PGMPOOLKIND_32BIT_PD_PHYS:
4042 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
4043 break;
4044
4045 /* 64-bit entries */
4046 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4047 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4048 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4049 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4050 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4051 case PGMPOOLKIND_PAE_PD_PHYS:
4052 case PGMPOOLKIND_PAE_PDPT_PHYS:
4053 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4054 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4055 case PGMPOOLKIND_64BIT_PML4:
4056 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4057 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4058 case PGMPOOLKIND_PAE_PDPT:
4059 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4060 case PGMPOOLKIND_ROOT_NESTED:
4061 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4062 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4063# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4064 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4065 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
4066 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4067 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4068 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4069#endif
4070 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
4071 break;
4072
4073 default:
4074 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
4075 }
4076 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
4077}
4078
4079
4080/**
4081 * Clears all users of a page.
4082 */
4083static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4084{
4085 /*
4086 * Free all the user records.
4087 */
4088 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
4089
4090 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4091 uint16_t i = pPage->iUserHead;
4092 while (i != NIL_PGMPOOL_USER_INDEX)
4093 {
4094 /* Clear enter in user table. */
4095 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
4096
4097 /* Free it. */
4098 const uint16_t iNext = paUsers[i].iNext;
4099 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4100 paUsers[i].iNext = pPool->iUserFreeHead;
4101 pPool->iUserFreeHead = i;
4102
4103 /* Next. */
4104 i = iNext;
4105 }
4106 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4107}
4108
4109
4110/**
4111 * Allocates a new physical cross reference extent.
4112 *
4113 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4114 * @param pVM The cross context VM structure.
4115 * @param piPhysExt Where to store the phys ext index.
4116 */
4117PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
4118{
4119 PGM_LOCK_ASSERT_OWNER(pVM);
4120 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4121 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4122 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4123 {
4124 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4125 return NULL;
4126 }
4127 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4128 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4129 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4130 *piPhysExt = iPhysExt;
4131 return pPhysExt;
4132}
4133
4134
4135/**
4136 * Frees a physical cross reference extent.
4137 *
4138 * @param pVM The cross context VM structure.
4139 * @param iPhysExt The extent to free.
4140 */
4141void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
4142{
4143 PGM_LOCK_ASSERT_OWNER(pVM);
4144 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4145 Assert(iPhysExt < pPool->cMaxPhysExts);
4146 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4147 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4148 {
4149 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4150 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4151 }
4152 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4153 pPool->iPhysExtFreeHead = iPhysExt;
4154}
4155
4156
4157/**
4158 * Frees a physical cross reference extent.
4159 *
4160 * @param pVM The cross context VM structure.
4161 * @param iPhysExt The extent to free.
4162 */
4163void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
4164{
4165 PGM_LOCK_ASSERT_OWNER(pVM);
4166 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4167
4168 const uint16_t iPhysExtStart = iPhysExt;
4169 PPGMPOOLPHYSEXT pPhysExt;
4170 do
4171 {
4172 Assert(iPhysExt < pPool->cMaxPhysExts);
4173 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4174 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4175 {
4176 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4177 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4178 }
4179
4180 /* next */
4181 iPhysExt = pPhysExt->iNext;
4182 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4183
4184 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4185 pPool->iPhysExtFreeHead = iPhysExtStart;
4186}
4187
4188
4189/**
4190 * Insert a reference into a list of physical cross reference extents.
4191 *
4192 * @returns The new tracking data for PGMPAGE.
4193 *
4194 * @param pVM The cross context VM structure.
4195 * @param iPhysExt The physical extent index of the list head.
4196 * @param iShwPT The shadow page table index.
4197 * @param iPte Page table entry
4198 *
4199 */
4200static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4201{
4202 PGM_LOCK_ASSERT_OWNER(pVM);
4203 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4204 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4205
4206 /*
4207 * Special common cases.
4208 */
4209 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4210 {
4211 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4212 paPhysExts[iPhysExt].apte[1] = iPte;
4213 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4214 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4215 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4216 }
4217 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4218 {
4219 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4220 paPhysExts[iPhysExt].apte[2] = iPte;
4221 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4222 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4223 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4224 }
4225 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4226
4227 /*
4228 * General treatment.
4229 */
4230 const uint16_t iPhysExtStart = iPhysExt;
4231 unsigned cMax = 15;
4232 for (;;)
4233 {
4234 Assert(iPhysExt < pPool->cMaxPhysExts);
4235 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4236 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4237 {
4238 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4239 paPhysExts[iPhysExt].apte[i] = iPte;
4240 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4241 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4242 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4243 }
4244 if (!--cMax)
4245 {
4246 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
4247 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4248 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4249 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4250 }
4251
4252 /* advance */
4253 iPhysExt = paPhysExts[iPhysExt].iNext;
4254 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4255 break;
4256 }
4257
4258 /*
4259 * Add another extent to the list.
4260 */
4261 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4262 if (!pNew)
4263 {
4264 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4265 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4266 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4267 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4268 }
4269 pNew->iNext = iPhysExtStart;
4270 pNew->aidx[0] = iShwPT;
4271 pNew->apte[0] = iPte;
4272 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4273 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4274}
4275
4276
4277/**
4278 * Add a reference to guest physical page where extents are in use.
4279 *
4280 * @returns The new tracking data for PGMPAGE.
4281 *
4282 * @param pVM The cross context VM structure.
4283 * @param pPhysPage Pointer to the aPages entry in the ram range.
4284 * @param u16 The ram range flags (top 16-bits).
4285 * @param iShwPT The shadow page table index.
4286 * @param iPte Page table entry
4287 */
4288uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4289{
4290 PGM_LOCK_VOID(pVM);
4291 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4292 {
4293 /*
4294 * Convert to extent list.
4295 */
4296 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4297 uint16_t iPhysExt;
4298 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4299 if (pPhysExt)
4300 {
4301 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4302 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4303 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4304 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4305 pPhysExt->aidx[1] = iShwPT;
4306 pPhysExt->apte[1] = iPte;
4307 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4308 }
4309 else
4310 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4311 }
4312 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4313 {
4314 /*
4315 * Insert into the extent list.
4316 */
4317 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4318 }
4319 else
4320 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4321 PGM_UNLOCK(pVM);
4322 return u16;
4323}
4324
4325
4326/**
4327 * Clear references to guest physical memory.
4328 *
4329 * @param pPool The pool.
4330 * @param pPage The page.
4331 * @param pPhysPage Pointer to the aPages entry in the ram range.
4332 * @param iPte Shadow PTE index
4333 */
4334void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4335{
4336 PVMCC pVM = pPool->CTX_SUFF(pVM);
4337 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4338 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4339
4340 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4341 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4342 {
4343 PGM_LOCK_VOID(pVM);
4344
4345 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4346 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4347 do
4348 {
4349 Assert(iPhysExt < pPool->cMaxPhysExts);
4350
4351 /*
4352 * Look for the shadow page and check if it's all freed.
4353 */
4354 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4355 {
4356 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4357 && paPhysExts[iPhysExt].apte[i] == iPte)
4358 {
4359 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4360 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4361
4362 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4363 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4364 {
4365 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4366 PGM_UNLOCK(pVM);
4367 return;
4368 }
4369
4370 /* we can free the node. */
4371 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4372 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4373 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4374 {
4375 /* lonely node */
4376 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4377 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4378 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4379 }
4380 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4381 {
4382 /* head */
4383 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4384 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4385 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4386 }
4387 else
4388 {
4389 /* in list */
4390 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4391 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4392 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4393 }
4394 iPhysExt = iPhysExtNext;
4395 PGM_UNLOCK(pVM);
4396 return;
4397 }
4398 }
4399
4400 /* next */
4401 iPhysExtPrev = iPhysExt;
4402 iPhysExt = paPhysExts[iPhysExt].iNext;
4403 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4404
4405 PGM_UNLOCK(pVM);
4406 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4407 }
4408 else /* nothing to do */
4409 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4410}
4411
4412/**
4413 * Clear references to guest physical memory.
4414 *
4415 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4416 * physical address is assumed to be correct, so the linear search can be
4417 * skipped and we can assert at an earlier point.
4418 *
4419 * @param pPool The pool.
4420 * @param pPage The page.
4421 * @param HCPhys The host physical address corresponding to the guest page.
4422 * @param GCPhys The guest physical address corresponding to HCPhys.
4423 * @param iPte Shadow PTE index
4424 */
4425static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4426{
4427 /*
4428 * Lookup the page and check if it checks out before derefing it.
4429 */
4430 PVMCC pVM = pPool->CTX_SUFF(pVM);
4431 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4432 if (pPhysPage)
4433 {
4434 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4435#ifdef LOG_ENABLED
4436 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4437 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4438#endif
4439 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4440 {
4441 Assert(pPage->cPresent);
4442 Assert(pPool->cPresent);
4443 pPage->cPresent--;
4444 pPool->cPresent--;
4445 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4446 return;
4447 }
4448
4449 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp iPte=%u fIsNested=%RTbool\n",
4450 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage), iPte, PGMPOOL_PAGE_IS_NESTED(pPage)));
4451 }
4452 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4453}
4454
4455
4456/**
4457 * Clear references to guest physical memory.
4458 *
4459 * @param pPool The pool.
4460 * @param pPage The page.
4461 * @param HCPhys The host physical address corresponding to the guest page.
4462 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4463 * @param iPte Shadow pte index
4464 */
4465void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4466{
4467 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4468
4469 /*
4470 * Try the hint first.
4471 */
4472 RTHCPHYS HCPhysHinted;
4473 PVMCC pVM = pPool->CTX_SUFF(pVM);
4474 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4475 if (pPhysPage)
4476 {
4477 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4478 Assert(HCPhysHinted);
4479 if (HCPhysHinted == HCPhys)
4480 {
4481 Assert(pPage->cPresent);
4482 Assert(pPool->cPresent);
4483 pPage->cPresent--;
4484 pPool->cPresent--;
4485 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4486 return;
4487 }
4488 }
4489 else
4490 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4491
4492 /*
4493 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4494 */
4495 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4496 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4497 while (pRam)
4498 {
4499 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4500 while (iPage-- > 0)
4501 {
4502 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4503 {
4504 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4505 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4506 Assert(pPage->cPresent);
4507 Assert(pPool->cPresent);
4508 pPage->cPresent--;
4509 pPool->cPresent--;
4510 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4511 return;
4512 }
4513 }
4514 pRam = pRam->CTX_SUFF(pNext);
4515 }
4516
4517 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4518}
4519
4520
4521/**
4522 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4523 *
4524 * @param pPool The pool.
4525 * @param pPage The page.
4526 * @param pShwPT The shadow page table (mapping of the page).
4527 * @param pGstPT The guest page table.
4528 */
4529DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4530{
4531 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4532 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4533 {
4534 const X86PGUINT uPte = pShwPT->a[i].u;
4535 Assert(!(uPte & RT_BIT_32(10)));
4536 if (uPte & X86_PTE_P)
4537 {
4538 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4539 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4540 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4541 if (!pPage->cPresent)
4542 break;
4543 }
4544 }
4545}
4546
4547
4548/**
4549 * Clear references to guest physical memory in a PAE / 32-bit page table.
4550 *
4551 * @param pPool The pool.
4552 * @param pPage The page.
4553 * @param pShwPT The shadow page table (mapping of the page).
4554 * @param pGstPT The guest page table (just a half one).
4555 */
4556DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4557{
4558 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4559 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4560 {
4561 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4562 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4563 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4564 {
4565 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4566 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4567 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4568 if (!pPage->cPresent)
4569 break;
4570 }
4571 }
4572}
4573
4574
4575/**
4576 * Clear references to guest physical memory in a PAE / PAE page table.
4577 *
4578 * @param pPool The pool.
4579 * @param pPage The page.
4580 * @param pShwPT The shadow page table (mapping of the page).
4581 * @param pGstPT The guest page table.
4582 */
4583DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4584{
4585 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4586 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4587 {
4588 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4589 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4590 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4591 {
4592 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4593 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4594 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4595 if (!pPage->cPresent)
4596 break;
4597 }
4598 }
4599}
4600
4601
4602/**
4603 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4604 *
4605 * @param pPool The pool.
4606 * @param pPage The page.
4607 * @param pShwPT The shadow page table (mapping of the page).
4608 */
4609DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4610{
4611 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4612 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4613 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4614 {
4615 const X86PGUINT uPte = pShwPT->a[i].u;
4616 Assert(!(uPte & RT_BIT_32(10)));
4617 if (uPte & X86_PTE_P)
4618 {
4619 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4620 i, uPte & X86_PTE_PG_MASK, GCPhys));
4621 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4622 if (!pPage->cPresent)
4623 break;
4624 }
4625 }
4626}
4627
4628
4629/**
4630 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4631 *
4632 * @param pPool The pool.
4633 * @param pPage The page.
4634 * @param pShwPT The shadow page table (mapping of the page).
4635 */
4636DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4637{
4638 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4639 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4640 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4641 {
4642 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4643 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4644 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4645 {
4646 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4647 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4648 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4649 if (!pPage->cPresent)
4650 break;
4651 }
4652 }
4653}
4654
4655
4656/**
4657 * Clear references to shadowed pages in an EPT page table.
4658 *
4659 * @param pPool The pool.
4660 * @param pPage The page.
4661 * @param pShwPT The shadow page directory pointer table (mapping of the
4662 * page).
4663 */
4664DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4665{
4666 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4667 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4668 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4669 {
4670 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4671 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4672 if (uPte & EPT_E_READ)
4673 {
4674 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4675 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4676 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4677 if (!pPage->cPresent)
4678 break;
4679 }
4680 }
4681}
4682
4683#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4684
4685/**
4686 * Clears references to shadowed pages in a SLAT EPT page table.
4687 *
4688 * @param pPool The pool.
4689 * @param pPage The page.
4690 * @param pShwPT The shadow page table (mapping of the page).
4691 * @param pGstPT The guest page table.
4692 */
4693DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT, PCEPTPT pGstPT)
4694{
4695 Assert(PGMPOOL_PAGE_IS_NESTED(pPage));
4696 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4697 {
4698 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4699 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4700 if (uShwPte & EPT_PRESENT_MASK)
4701 {
4702 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, pGstPT->a[i].u));
4703 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, pGstPT->a[i].u & EPT_PTE_PG_MASK, i);
4704 if (!pPage->cPresent)
4705 break;
4706 }
4707 }
4708}
4709
4710
4711/**
4712 * Clear references to guest physical memory in a SLAT 2MB EPT page table.
4713 *
4714 * @param pPool The pool.
4715 * @param pPage The page.
4716 * @param pShwPT The shadow page table (mapping of the page).
4717 */
4718DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT2MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4719{
4720 Assert(pPage->fA20Enabled);
4721 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4722 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4723 {
4724 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4725 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4726 if (uShwPte & EPT_PRESENT_MASK)
4727 {
4728 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, GCPhys));
4729 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, GCPhys, i);
4730 if (!pPage->cPresent)
4731 break;
4732 }
4733 }
4734}
4735
4736
4737/**
4738 * Clear references to shadowed pages in a SLAT EPT page directory.
4739 *
4740 * @param pPool The pool.
4741 * @param pPage The page.
4742 * @param pShwPD The shadow page directory (mapping of the page).
4743 * @param pGstPD The guest page directory.
4744 */
4745DECLINLINE(void) pgmPoolTrackDerefNestedPDEpt(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD, PCEPTPD pGstPD)
4746{
4747 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4748 {
4749 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4750#ifdef PGM_WITH_LARGE_PAGES
4751 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4752#else
4753 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4754#endif
4755 if (uPde & EPT_PRESENT_MASK)
4756 {
4757#ifdef PGM_WITH_LARGE_PAGES
4758 if (uPde & EPT_E_LEAF)
4759 {
4760 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n", i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4761 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK, pGstPD->a[i].u & EPT_PDE2M_PG_MASK, i);
4762 }
4763 else
4764#endif
4765 {
4766 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4767 if (pSubPage)
4768 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4769 else
4770 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4771 }
4772 }
4773 }
4774}
4775
4776#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
4777
4778
4779/**
4780 * Clear references to shadowed pages in a 32 bits page directory.
4781 *
4782 * @param pPool The pool.
4783 * @param pPage The page.
4784 * @param pShwPD The shadow page directory (mapping of the page).
4785 */
4786DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4787{
4788 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4789 {
4790 X86PGUINT const uPde = pShwPD->a[i].u;
4791 if (uPde & X86_PDE_P)
4792 {
4793 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4794 if (pSubPage)
4795 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4796 else
4797 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4798 }
4799 }
4800}
4801
4802
4803/**
4804 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4805 *
4806 * @param pPool The pool.
4807 * @param pPage The page.
4808 * @param pShwPD The shadow page directory (mapping of the page).
4809 */
4810DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4811{
4812 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4813 {
4814 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4815 if (uPde & X86_PDE_P)
4816 {
4817#ifdef PGM_WITH_LARGE_PAGES
4818 if (uPde & X86_PDE_PS)
4819 {
4820 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4821 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4822 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4823 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4824 i);
4825 }
4826 else
4827#endif
4828 {
4829 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4830 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4831 if (pSubPage)
4832 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4833 else
4834 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4835 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4836 }
4837 }
4838 }
4839}
4840
4841
4842/**
4843 * Clear references to shadowed pages in a PAE page directory pointer table.
4844 *
4845 * @param pPool The pool.
4846 * @param pPage The page.
4847 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4848 */
4849DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4850{
4851 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4852 {
4853 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4854 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4855 if (uPdpe & X86_PDPE_P)
4856 {
4857 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4858 if (pSubPage)
4859 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4860 else
4861 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4862 }
4863 }
4864}
4865
4866
4867/**
4868 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4869 *
4870 * @param pPool The pool.
4871 * @param pPage The page.
4872 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4873 */
4874DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4875{
4876 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4877 {
4878 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4879 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4880 if (uPdpe & X86_PDPE_P)
4881 {
4882 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4883 if (pSubPage)
4884 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4885 else
4886 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4887 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4888 }
4889 }
4890}
4891
4892
4893/**
4894 * Clear references to shadowed pages in a 64-bit level 4 page table.
4895 *
4896 * @param pPool The pool.
4897 * @param pPage The page.
4898 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4899 */
4900DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4901{
4902 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4903 {
4904 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4905 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4906 if (uPml4e & X86_PML4E_P)
4907 {
4908 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4909 if (pSubPage)
4910 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4911 else
4912 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4913 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4914 }
4915 }
4916}
4917
4918
4919/**
4920 * Clear references to shadowed pages in an EPT page directory.
4921 *
4922 * @param pPool The pool.
4923 * @param pPage The page.
4924 * @param pShwPD The shadow page directory (mapping of the page).
4925 */
4926DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4927{
4928 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4929 {
4930 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4931#ifdef PGM_WITH_LARGE_PAGES
4932 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4933#else
4934 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4935#endif
4936 if (uPde & EPT_E_READ)
4937 {
4938#ifdef PGM_WITH_LARGE_PAGES
4939 if (uPde & EPT_E_LEAF)
4940 {
4941 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4942 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4943 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4944 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4945 i);
4946 }
4947 else
4948#endif
4949 {
4950 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4951 if (pSubPage)
4952 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4953 else
4954 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4955 }
4956 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4957 }
4958 }
4959}
4960
4961
4962/**
4963 * Clear references to shadowed pages in an EPT page directory pointer table.
4964 *
4965 * @param pPool The pool.
4966 * @param pPage The page.
4967 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4968 */
4969DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4970{
4971 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4972 {
4973 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4974 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4975 if (uPdpe & EPT_E_READ)
4976 {
4977 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4978 if (pSubPage)
4979 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4980 else
4981 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4982 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4983 }
4984 }
4985}
4986
4987
4988/**
4989 * Clears all references made by this page.
4990 *
4991 * This includes other shadow pages and GC physical addresses.
4992 *
4993 * @param pPool The pool.
4994 * @param pPage The page.
4995 */
4996static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4997{
4998 /*
4999 * Map the shadow page and take action according to the page kind.
5000 */
5001 PVMCC pVM = pPool->CTX_SUFF(pVM);
5002 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5003 switch (pPage->enmKind)
5004 {
5005 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5006 {
5007 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5008 void *pvGst;
5009 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5010 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
5011 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5012 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5013 break;
5014 }
5015
5016 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5017 {
5018 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5019 void *pvGst;
5020 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5021 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
5022 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5023 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5024 break;
5025 }
5026
5027 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5028 {
5029 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5030 void *pvGst;
5031 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5032 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
5033 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5034 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5035 break;
5036 }
5037
5038 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
5039 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5040 {
5041 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5042 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
5043 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5044 break;
5045 }
5046
5047 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
5048 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5049 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5050 {
5051 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5052 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
5053 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5054 break;
5055 }
5056
5057 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5058 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5059 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5060 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5061 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5062 case PGMPOOLKIND_PAE_PD_PHYS:
5063 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5064 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5065 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
5066 break;
5067
5068 case PGMPOOLKIND_32BIT_PD_PHYS:
5069 case PGMPOOLKIND_32BIT_PD:
5070 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
5071 break;
5072
5073 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5074 case PGMPOOLKIND_PAE_PDPT:
5075 case PGMPOOLKIND_PAE_PDPT_PHYS:
5076 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
5077 break;
5078
5079 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5080 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5081 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
5082 break;
5083
5084 case PGMPOOLKIND_64BIT_PML4:
5085 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
5086 break;
5087
5088 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5089 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
5090 break;
5091
5092 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5093 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
5094 break;
5095
5096 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5097 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5098 break;
5099
5100#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
5101 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5102 {
5103 void *pvGst;
5104 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5105 pgmPoolTrackDerefNestedPTEPT(pPool, pPage, (PEPTPT)pvShw, (PCEPTPT)pvGst);
5106 break;
5107 }
5108
5109 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
5110 pgmPoolTrackDerefNestedPTEPT2MB(pPool, pPage, (PEPTPT)pvShw);
5111 break;
5112
5113 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5114 {
5115 void *pvGst;
5116 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5117 pgmPoolTrackDerefNestedPDEpt(pPool, pPage, (PEPTPD)pvShw, (PCEPTPD)pvGst);
5118 break;
5119 }
5120
5121 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5122 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5123 break;
5124#endif
5125
5126 default:
5127 AssertFatalMsgFailed(("enmKind=%d GCPhys=%RGp\n", pPage->enmKind, pPage->GCPhys));
5128 }
5129
5130 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
5131 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5132 ASMMemZeroPage(pvShw);
5133 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5134 pPage->fZeroed = true;
5135 Assert(!pPage->cPresent);
5136 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
5137}
5138
5139
5140/**
5141 * Flushes a pool page.
5142 *
5143 * This moves the page to the free list after removing all user references to it.
5144 *
5145 * @returns VBox status code.
5146 * @retval VINF_SUCCESS on success.
5147 * @param pPool The pool.
5148 * @param pPage The shadow page.
5149 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
5150 */
5151int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
5152{
5153 PVMCC pVM = pPool->CTX_SUFF(pVM);
5154 bool fFlushRequired = false;
5155
5156 int rc = VINF_SUCCESS;
5157 STAM_PROFILE_START(&pPool->StatFlushPage, f);
5158 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5159 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5160
5161 if (PGMPOOL_PAGE_IS_NESTED(pPage))
5162 Log7Func(("pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5163 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5164
5165 /*
5166 * Reject any attempts at flushing any of the special root pages (shall
5167 * not happen).
5168 */
5169 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
5170 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
5171 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
5172 VINF_SUCCESS);
5173
5174 PGM_LOCK_VOID(pVM);
5175
5176 /*
5177 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
5178 */
5179 if (pgmPoolIsPageLocked(pPage))
5180 {
5181#if !defined(VBOX_VMM_TARGET_ARMV8)
5182 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
5183 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
5184 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
5185 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
5186 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5187 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
5188 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
5189 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
5190 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
5191 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
5192 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
5193#endif
5194 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
5195 PGM_UNLOCK(pVM);
5196 return VINF_SUCCESS;
5197 }
5198
5199 /*
5200 * Mark the page as being in need of an ASMMemZeroPage().
5201 */
5202 pPage->fZeroed = false;
5203
5204#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5205 if (pPage->fDirty)
5206 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
5207#endif
5208
5209 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
5210 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
5211 fFlushRequired = true;
5212
5213 /*
5214 * Clear the page.
5215 */
5216 pgmPoolTrackClearPageUsers(pPool, pPage);
5217 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
5218 pgmPoolTrackDeref(pPool, pPage);
5219 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
5220
5221 /*
5222 * Flush it from the cache.
5223 */
5224 pgmPoolCacheFlushPage(pPool, pPage);
5225
5226 /*
5227 * Deregistering the monitoring.
5228 */
5229 if (pPage->fMonitored)
5230 rc = pgmPoolMonitorFlush(pPool, pPage);
5231
5232 /*
5233 * Free the page.
5234 */
5235 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
5236 pPage->iNext = pPool->iFreeHead;
5237 pPool->iFreeHead = pPage->idx;
5238 pPage->enmKind = PGMPOOLKIND_FREE;
5239 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5240 pPage->GCPhys = NIL_RTGCPHYS;
5241 pPage->fReusedFlushPending = false;
5242
5243 pPool->cUsedPages--;
5244
5245 /* Flush the TLBs of all VCPUs if required. */
5246 if ( fFlushRequired
5247 && fFlush)
5248 {
5249 PGM_INVL_ALL_VCPU_TLBS(pVM);
5250 }
5251
5252 PGM_UNLOCK(pVM);
5253 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5254 return rc;
5255}
5256
5257
5258/**
5259 * Frees a usage of a pool page.
5260 *
5261 * The caller is responsible to updating the user table so that it no longer
5262 * references the shadow page.
5263 *
5264 * @param pPool The pool.
5265 * @param pPage The shadow page.
5266 * @param iUser The shadow page pool index of the user table.
5267 * NIL_PGMPOOL_IDX for root pages.
5268 * @param iUserTable The index into the user table (shadowed). Ignored if
5269 * root page.
5270 */
5271void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5272{
5273 PVMCC pVM = pPool->CTX_SUFF(pVM);
5274
5275 STAM_PROFILE_START(&pPool->StatFree, a);
5276 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5277 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5278 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5279
5280 PGM_LOCK_VOID(pVM);
5281 if (iUser != NIL_PGMPOOL_IDX)
5282 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5283 if (!pPage->fCached)
5284 pgmPoolFlushPage(pPool, pPage);
5285 PGM_UNLOCK(pVM);
5286 STAM_PROFILE_STOP(&pPool->StatFree, a);
5287}
5288
5289
5290/**
5291 * Makes one or more free page free.
5292 *
5293 * @returns VBox status code.
5294 * @retval VINF_SUCCESS on success.
5295 *
5296 * @param pPool The pool.
5297 * @param enmKind Page table kind
5298 * @param iUser The user of the page.
5299 */
5300static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5301{
5302 PVMCC pVM = pPool->CTX_SUFF(pVM);
5303 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5304 NOREF(enmKind);
5305
5306 /*
5307 * If the pool isn't full grown yet, expand it.
5308 */
5309 if (pPool->cCurPages < pPool->cMaxPages)
5310 {
5311 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5312#ifdef IN_RING3
5313 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
5314#else
5315 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
5316#endif
5317 if (RT_FAILURE(rc))
5318 return rc;
5319 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5320 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5321 return VINF_SUCCESS;
5322 }
5323
5324 /*
5325 * Free one cached page.
5326 */
5327 return pgmPoolCacheFreeOne(pPool, iUser);
5328}
5329
5330
5331/**
5332 * Allocates a page from the pool.
5333 *
5334 * This page may actually be a cached page and not in need of any processing
5335 * on the callers part.
5336 *
5337 * @returns VBox status code.
5338 * @retval VINF_SUCCESS if a NEW page was allocated.
5339 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5340 *
5341 * @param pVM The cross context VM structure.
5342 * @param GCPhys The GC physical address of the page we're gonna shadow.
5343 * For 4MB and 2MB PD entries, it's the first address the
5344 * shadow PT is covering.
5345 * @param enmKind The kind of mapping.
5346 * @param enmAccess Access type for the mapping (only relevant for big pages)
5347 * @param fA20Enabled Whether the A20 gate is enabled or not.
5348 * @param iUser The shadow page pool index of the user table. Root
5349 * pages should pass NIL_PGMPOOL_IDX.
5350 * @param iUserTable The index into the user table (shadowed). Ignored for
5351 * root pages (iUser == NIL_PGMPOOL_IDX).
5352 * @param fLockPage Lock the page
5353 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5354 */
5355int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5356 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5357{
5358 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5359 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5360 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5361 *ppPage = NULL;
5362 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5363 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5364 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5365
5366#if defined(VBOX_STRICT) && defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT)
5367 PVMCPUCC pVCpu = VMMGetCpu(pVM);
5368 Assert(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT || PGMPOOL_PAGE_IS_KIND_NESTED(enmKind));
5369#endif
5370
5371 PGM_LOCK_VOID(pVM);
5372
5373 if (pPool->fCacheEnabled)
5374 {
5375 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5376 if (RT_SUCCESS(rc2))
5377 {
5378 if (fLockPage)
5379 pgmPoolLockPage(pPool, *ppPage);
5380 PGM_UNLOCK(pVM);
5381 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5382 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5383 return rc2;
5384 }
5385 }
5386
5387 /*
5388 * Allocate a new one.
5389 */
5390 int rc = VINF_SUCCESS;
5391 uint16_t iNew = pPool->iFreeHead;
5392 if (iNew == NIL_PGMPOOL_IDX)
5393 {
5394 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5395 if (RT_FAILURE(rc))
5396 {
5397 PGM_UNLOCK(pVM);
5398 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5399 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5400 return rc;
5401 }
5402 iNew = pPool->iFreeHead;
5403 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5404 }
5405
5406 /* unlink the free head */
5407 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5408 pPool->iFreeHead = pPage->iNext;
5409 pPage->iNext = NIL_PGMPOOL_IDX;
5410
5411 /*
5412 * Initialize it.
5413 */
5414 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5415 pPage->enmKind = enmKind;
5416 pPage->enmAccess = enmAccess;
5417 pPage->GCPhys = GCPhys;
5418 pPage->fA20Enabled = fA20Enabled;
5419 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5420 pPage->fMonitored = false;
5421 pPage->fCached = false;
5422 pPage->fDirty = false;
5423 pPage->fReusedFlushPending = false;
5424 pPage->cModifications = 0;
5425 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5426 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5427 pPage->cPresent = 0;
5428 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5429 pPage->idxDirtyEntry = 0;
5430 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5431 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5432 pPage->cLastAccessHandler = 0;
5433 pPage->cLocked = 0;
5434# ifdef VBOX_STRICT
5435 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5436# endif
5437
5438 /*
5439 * Insert into the tracking and cache. If this fails, free the page.
5440 */
5441 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5442 if (RT_FAILURE(rc3))
5443 {
5444 pPool->cUsedPages--;
5445 pPage->enmKind = PGMPOOLKIND_FREE;
5446 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5447 pPage->GCPhys = NIL_RTGCPHYS;
5448 pPage->iNext = pPool->iFreeHead;
5449 pPool->iFreeHead = pPage->idx;
5450 PGM_UNLOCK(pVM);
5451 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5452 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5453 return rc3;
5454 }
5455
5456 /*
5457 * Commit the allocation, clear the page and return.
5458 */
5459#ifdef VBOX_WITH_STATISTICS
5460 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5461 pPool->cUsedPagesHigh = pPool->cUsedPages;
5462#endif
5463
5464 if (!pPage->fZeroed)
5465 {
5466 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5467 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5468 ASMMemZeroPage(pv);
5469 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5470 }
5471
5472 *ppPage = pPage;
5473 if (fLockPage)
5474 pgmPoolLockPage(pPool, pPage);
5475 PGM_UNLOCK(pVM);
5476 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5477 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5478 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5479 return rc;
5480}
5481
5482
5483/**
5484 * Frees a usage of a pool page.
5485 *
5486 * @param pVM The cross context VM structure.
5487 * @param HCPhys The HC physical address of the shadow page.
5488 * @param iUser The shadow page pool index of the user table.
5489 * NIL_PGMPOOL_IDX if root page.
5490 * @param iUserTable The index into the user table (shadowed). Ignored if
5491 * root page.
5492 */
5493void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5494{
5495 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5496 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5497 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5498}
5499
5500
5501/**
5502 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5503 *
5504 * @returns Pointer to the shadow page structure.
5505 * @param pPool The pool.
5506 * @param HCPhys The HC physical address of the shadow page.
5507 */
5508PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5509{
5510 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5511
5512 /*
5513 * Look up the page.
5514 */
5515 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5516
5517 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5518 return pPage;
5519}
5520
5521
5522/**
5523 * Internal worker for finding a page for debugging purposes, no assertions.
5524 *
5525 * @returns Pointer to the shadow page structure. NULL on if not found.
5526 * @param pPool The pool.
5527 * @param HCPhys The HC physical address of the shadow page.
5528 */
5529PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5530{
5531 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5532 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5533}
5534
5535
5536/**
5537 * Internal worker for PGM_HCPHYS_2_PTR.
5538 *
5539 * @returns VBox status code.
5540 * @param pVM The cross context VM structure.
5541 * @param HCPhys The HC physical address of the shadow page.
5542 * @param ppv Where to return the address.
5543 */
5544int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5545{
5546 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5547 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5548 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5549 VERR_PGM_POOL_GET_PAGE_FAILED);
5550 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5551 return VINF_SUCCESS;
5552}
5553
5554#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5555
5556/**
5557 * Flush the specified page if present
5558 *
5559 * @param pVM The cross context VM structure.
5560 * @param GCPhys Guest physical address of the page to flush
5561 */
5562void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5563{
5564 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5565
5566 VM_ASSERT_EMT(pVM);
5567
5568 /*
5569 * Look up the GCPhys in the hash.
5570 */
5571 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5572 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5573 if (i == NIL_PGMPOOL_IDX)
5574 return;
5575
5576 do
5577 {
5578 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5579 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5580 {
5581 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* Temporary to see if it hits. Remove later. */
5582 switch (pPage->enmKind)
5583 {
5584 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5585 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5586 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5587 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5588 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5589 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5590 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5591 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5592 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5593 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5594 case PGMPOOLKIND_64BIT_PML4:
5595 case PGMPOOLKIND_32BIT_PD:
5596 case PGMPOOLKIND_PAE_PDPT:
5597 {
5598 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5599# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5600 if (pPage->fDirty)
5601 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5602 else
5603# endif
5604 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5605 Assert(!pgmPoolIsPageLocked(pPage));
5606 pgmPoolMonitorChainFlush(pPool, pPage);
5607 return;
5608 }
5609
5610 /* ignore, no monitoring. */
5611 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5612 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5613 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5614 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5615 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5616 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5617 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5618 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5619 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5620 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5621 case PGMPOOLKIND_ROOT_NESTED:
5622 case PGMPOOLKIND_PAE_PD_PHYS:
5623 case PGMPOOLKIND_PAE_PDPT_PHYS:
5624 case PGMPOOLKIND_32BIT_PD_PHYS:
5625 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5626 break;
5627
5628 default:
5629 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5630 }
5631 }
5632
5633 /* next */
5634 i = pPage->iNext;
5635 } while (i != NIL_PGMPOOL_IDX);
5636 return;
5637}
5638
5639
5640/**
5641 * Reset CPU on hot plugging.
5642 *
5643 * @param pVM The cross context VM structure.
5644 * @param pVCpu The cross context virtual CPU structure.
5645 */
5646void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5647{
5648 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5649
5650 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5651 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5652 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5653}
5654
5655
5656/**
5657 * Flushes the entire cache.
5658 *
5659 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5660 * this and execute this CR3 flush.
5661 *
5662 * @param pVM The cross context VM structure.
5663 */
5664void pgmR3PoolReset(PVM pVM)
5665{
5666 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5667
5668 PGM_LOCK_ASSERT_OWNER(pVM);
5669 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5670 LogFlow(("pgmR3PoolReset:\n"));
5671
5672 /*
5673 * If there are no pages in the pool, there is nothing to do.
5674 */
5675 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5676 {
5677 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5678 return;
5679 }
5680
5681 /*
5682 * Exit the shadow mode since we're going to clear everything,
5683 * including the root page.
5684 */
5685 VMCC_FOR_EACH_VMCPU(pVM)
5686 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5687 VMCC_FOR_EACH_VMCPU_END(pVM);
5688
5689
5690 /*
5691 * Nuke the free list and reinsert all pages into it.
5692 */
5693 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5694 {
5695 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5696
5697 if (pPage->fMonitored)
5698 pgmPoolMonitorFlush(pPool, pPage);
5699 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5700 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5701 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5702 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5703 pPage->GCPhys = NIL_RTGCPHYS;
5704 pPage->enmKind = PGMPOOLKIND_FREE;
5705 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5706 Assert(pPage->idx == i);
5707 pPage->iNext = i + 1;
5708 pPage->fA20Enabled = true;
5709 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5710 pPage->fSeenNonGlobal = false;
5711 pPage->fMonitored = false;
5712 pPage->fDirty = false;
5713 pPage->fCached = false;
5714 pPage->fReusedFlushPending = false;
5715 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5716 pPage->cPresent = 0;
5717 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5718 pPage->cModifications = 0;
5719 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5720 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5721 pPage->idxDirtyEntry = 0;
5722 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5723 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5724 pPage->cLastAccessHandler = 0;
5725 pPage->cLocked = 0;
5726# ifdef VBOX_STRICT
5727 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5728# endif
5729 }
5730 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5731 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5732 pPool->cUsedPages = 0;
5733
5734 /*
5735 * Zap and reinitialize the user records.
5736 */
5737 pPool->cPresent = 0;
5738 pPool->iUserFreeHead = 0;
5739 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5740 const unsigned cMaxUsers = pPool->cMaxUsers;
5741 for (unsigned i = 0; i < cMaxUsers; i++)
5742 {
5743 paUsers[i].iNext = i + 1;
5744 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5745 paUsers[i].iUserTable = 0xfffffffe;
5746 }
5747 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5748
5749 /*
5750 * Clear all the GCPhys links and rebuild the phys ext free list.
5751 */
5752 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5753 pRam;
5754 pRam = pRam->CTX_SUFF(pNext))
5755 {
5756 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5757 while (iPage-- > 0)
5758 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5759 }
5760
5761 pPool->iPhysExtFreeHead = 0;
5762 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5763 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5764 for (unsigned i = 0; i < cMaxPhysExts; i++)
5765 {
5766 paPhysExts[i].iNext = i + 1;
5767 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5768 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5769 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5770 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5771 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5772 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5773 }
5774 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5775
5776 /*
5777 * Just zap the modified list.
5778 */
5779 pPool->cModifiedPages = 0;
5780 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5781
5782 /*
5783 * Clear the GCPhys hash and the age list.
5784 */
5785 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5786 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5787 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5788 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5789
5790# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5791 /* Clear all dirty pages. */
5792 pPool->idxFreeDirtyPage = 0;
5793 pPool->cDirtyPages = 0;
5794 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5795 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5796# endif
5797
5798 /*
5799 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5800 */
5801 VMCC_FOR_EACH_VMCPU(pVM)
5802 {
5803 /*
5804 * Re-enter the shadowing mode and assert Sync CR3 FF.
5805 */
5806 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5807 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5808 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5809 }
5810 VMCC_FOR_EACH_VMCPU_END(pVM);
5811
5812 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5813}
5814
5815#endif /* IN_RING3 */
5816
5817#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5818/**
5819 * Stringifies a PGMPOOLKIND value.
5820 */
5821static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5822{
5823 switch ((PGMPOOLKIND)enmKind)
5824 {
5825 case PGMPOOLKIND_INVALID:
5826 return "PGMPOOLKIND_INVALID";
5827 case PGMPOOLKIND_FREE:
5828 return "PGMPOOLKIND_FREE";
5829 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5830 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5831 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5832 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5833 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5834 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5835 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5836 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5837 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5838 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5839 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5840 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5841 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5842 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5843 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5844 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5845 case PGMPOOLKIND_32BIT_PD:
5846 return "PGMPOOLKIND_32BIT_PD";
5847 case PGMPOOLKIND_32BIT_PD_PHYS:
5848 return "PGMPOOLKIND_32BIT_PD_PHYS";
5849 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5850 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5851 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5852 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5853 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5854 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5855 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5856 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5857 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5858 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5859 case PGMPOOLKIND_PAE_PD_PHYS:
5860 return "PGMPOOLKIND_PAE_PD_PHYS";
5861 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5862 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5863 case PGMPOOLKIND_PAE_PDPT:
5864 return "PGMPOOLKIND_PAE_PDPT";
5865 case PGMPOOLKIND_PAE_PDPT_PHYS:
5866 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5867 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5868 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5869 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5870 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5871 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5872 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5873 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5874 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5875 case PGMPOOLKIND_64BIT_PML4:
5876 return "PGMPOOLKIND_64BIT_PML4";
5877 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5878 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5879 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5880 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5881 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5882 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5883 case PGMPOOLKIND_ROOT_NESTED:
5884 return "PGMPOOLKIND_ROOT_NESTED";
5885 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5886 return "PGMPOOLKIND_EPT_PT_FOR_EPT_PT";
5887 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
5888 return "PGMPOOLKIND_EPT_PT_FOR_EPT_2MB";
5889 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5890 return "PGMPOOLKIND_EPT_PD_FOR_EPT_PD";
5891 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5892 return "PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT";
5893 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
5894 return "PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4";
5895 }
5896 return "Unknown kind!";
5897}
5898#endif /* LOG_ENABLED || VBOX_STRICT */
5899
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette