VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 2257

Last change on this file since 2257 was 2236, checked in by vboxsync, 18 years ago

Incorrect assertion. (guest ram can have unallocated gaps)

  • Property svn:keywords set to Id
File size: 123.3 KB
Line 
1/* $Id: PGMAllPool.cpp 2236 2007-04-19 15:33:39Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006 InnoTek Systemberatung GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * If you received this file as part of a commercial VirtualBox
18 * distribution, then only the terms of your commercial VirtualBox
19 * license agreement apply instead of the previous paragraph.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37
38#include <VBox/log.h>
39#include <VBox/err.h>
40#include <iprt/asm.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46__BEGIN_DECLS
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48#ifdef PGMPOOL_WITH_USER_TRACKING
49DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
50DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
51static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#endif
53#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
54static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
55#endif
56#ifdef PGMPOOL_WITH_CACHE
57static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
58#endif
59#ifdef PGMPOOL_WITH_MONITORING
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#endif
62#ifndef IN_RING3
63DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
64#endif
65__END_DECLS
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88#ifdef IN_GC
89/**
90 * Maps a pool page into the current context.
91 *
92 * @returns Pointer to the mapping.
93 * @param pVM The VM handle.
94 * @param pPage The page to map.
95 */
96void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
97{
98 /* general pages. */
99 if (pPage->idx >= PGMPOOL_IDX_FIRST)
100 {
101 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
102 void *pv;
103 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
104 AssertReleaseRC(rc);
105 return pv;
106 }
107
108 /* special pages. */
109 switch (pPage->idx)
110 {
111 case PGMPOOL_IDX_PD:
112 return pVM->pgm.s.pGC32BitPD;
113 case PGMPOOL_IDX_PAE_PD:
114 return pVM->pgm.s.apGCPaePDs[0];
115 case PGMPOOL_IDX_PDPTR:
116 return pVM->pgm.s.pGCPaePDPTR;
117 case PGMPOOL_IDX_PML4:
118 return pVM->pgm.s.pGCPaePML4;
119 default:
120 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
121 return NULL;
122 }
123}
124#endif /* IN_GC */
125
126
127#ifdef PGMPOOL_WITH_MONITORING
128/**
129 * Determin the size of a write instruction.
130 * @returns number of bytes written.
131 * @param pDis The disassembler state.
132 */
133static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
134{
135 /*
136 * This is very crude and possibly wrong for some opcodes,
137 * but since it's not really supposed to be called we can
138 * probably live with that.
139 */
140 return DISGetParamSize(pDis, &pDis->param1);
141}
142
143
144/**
145 * Flushes a chain of pages sharing the same access monitor.
146 *
147 * @returns VBox status code suitable for scheduling.
148 * @param pPool The pool.
149 * @param pPage A page in the chain.
150 */
151int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
152{
153 /*
154 * Find the list head.
155 */
156 uint16_t idx = pPage->idx;
157 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
158 {
159 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
160 {
161 idx = pPage->iMonitoredPrev;
162 Assert(idx != pPage->idx);
163 pPage = &pPool->aPages[idx];
164 }
165 }
166
167 /*
168 * Itereate the list flushing each shadow page.
169 */
170 int rc = VINF_SUCCESS;
171 for (;;)
172 {
173 idx = pPage->iMonitoredNext;
174 Assert(idx != pPage->idx);
175 if (pPage->idx >= PGMPOOL_IDX_FIRST)
176 {
177 int rc2 = pgmPoolFlushPage(pPool, pPage);
178 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
179 rc = VINF_PGM_SYNC_CR3;
180 }
181 /* next */
182 if (idx == NIL_PGMPOOL_IDX)
183 break;
184 pPage = &pPool->aPages[idx];
185 }
186 return rc;
187}
188
189
190/**
191 * Wrapper for getting the current context pointer to the entry begin modified.
192 *
193 * @returns Pointer to the current context mapping of the entry.
194 * @param pPool The pool.
195 * @param pvFault The fault virtual address.
196 * @param GCPhysFault The fault physical address.
197 * @param cbEntry The entry size.
198 */
199#ifdef IN_RING3
200DECLINLINE(void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
201#else
202DECLINLINE(void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
203#endif
204{
205#ifdef IN_GC
206 return (RTGCPTR)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
207
208#elif defined(IN_RING0)
209 void *pvRet;
210 int rc = PGMRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
211 AssertFatalRCSuccess(rc);
212 return pvRet;
213
214#elif defined(IN_RING3)
215 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
216#else
217# error "huh?"
218#endif
219}
220
221
222/**
223 * Process shadow entries before they are changed by the guest.
224 *
225 * For PT entries we will clear them. For PD entries, we'll simply check
226 * for mapping conflicts and set the SyncCR3 FF if found.
227 *
228 * @param pPool The pool.
229 * @param pPage The head page.
230 * @param GCPhysFault The guest physical fault address.
231 * @param uAddress In R0 and GC this is the guest context fault address (flat).
232 * In R3 this is the host context 'fault' address.
233 * @param pCpu The disassembler state for figuring out the write size.
234 * This need not be specified if the caller knows we won't do cross entry accesses.
235 */
236#ifdef IN_RING3
237void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
238#else
239void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
240#endif
241{
242 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
243 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
244 for (;;)
245 {
246 union
247 {
248 void *pv;
249 PX86PT pPT;
250 PX86PTPAE pPTPae;
251 PX86PD pPD;
252 PX86PDPAE pPDPae;
253 } uShw;
254 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
255
256 switch (pPage->enmKind)
257 {
258 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
259 {
260 const unsigned iShw = off / sizeof(X86PTE);
261 if (uShw.pPT->a[iShw].n.u1Present)
262 {
263# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
264 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
265
266 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
267 pgmPoolTracDerefGCPhysHint(pPool, pPage,
268 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
269 pGstPte->u & X86_PTE_PG_MASK);
270# endif
271 uShw.pPT->a[iShw].u = 0;
272 }
273 break;
274 }
275
276 /* page/2 sized */
277 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
278 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
279 {
280 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
281 if (uShw.pPTPae->a[iShw].n.u1Present)
282 {
283# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
284 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
285 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
286 pgmPoolTracDerefGCPhysHint(pPool, pPage,
287 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
288 pGstPte->u & X86_PTE_PG_MASK);
289# endif
290 uShw.pPTPae->a[iShw].u = 0;
291 }
292 }
293 break;
294
295 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
296 {
297 const unsigned iShw = off / sizeof(X86PTPAE);
298 if (uShw.pPTPae->a[iShw].n.u1Present)
299 {
300# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
301 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
302 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
303 pgmPoolTracDerefGCPhysHint(pPool, pPage,
304 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
305 pGstPte->u & X86_PTE_PAE_PG_MASK);
306# endif
307 uShw.pPTPae->a[iShw].u = 0;
308 }
309 break;
310 }
311
312 case PGMPOOLKIND_ROOT_32BIT_PD:
313 {
314 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
315 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
316 {
317 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
318 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
319 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
320 }
321 /* paranoia / a bit assumptive. */
322 else if ( pCpu
323 && (off & 4)
324 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
325 {
326 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
327 if ( iShw2 != iShw
328 && iShw2 < ELEMENTS(uShw.pPD->a)
329 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
330 {
331 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
332 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
333 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
334 }
335 }
336#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
337 if ( uShw.pPD->a[iShw].n.u1Present
338 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
339 {
340 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
341# ifdef IN_GC /* TLB load - we're pushing things a bit... */
342 ASMProbeReadByte(pvAddress);
343# endif
344 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
345 uShw.pPD->a[iShw].u = 0;
346 }
347#endif
348 break;
349 }
350
351 case PGMPOOLKIND_ROOT_PAE_PD:
352 {
353 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
354 for (unsigned i = 0; i < 2; i++, iShw++)
355 {
356 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
357 {
358 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
359 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
360 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
361 }
362 /* paranoia / a bit assumptive. */
363 else if ( pCpu
364 && (off & 4)
365 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
366 {
367 const unsigned iShw2 = iShw + 2;
368 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
369 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
370 {
371 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
372 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
373 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
374 }
375 }
376#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
377 if ( uShw.pPDPae->a[iShw].n.u1Present
378 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
379 {
380 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
381# ifdef IN_GC /* TLB load - we're pushing things a bit... */
382 ASMProbeReadByte(pvAddress);
383# endif
384 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
385 uShw.pPDPae->a[iShw].u = 0;
386 }
387#endif
388 }
389 break;
390 }
391
392 default:
393 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
394 }
395
396 /* next */
397 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
398 return;
399 pPage = &pPool->aPages[pPage->iMonitoredNext];
400 }
401}
402
403
404# ifndef IN_RING3
405/**
406 * Checks if a access could be a fork operation in progress.
407 *
408 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
409 *
410 * @returns true if it's likly that we're forking, otherwise false.
411 * @param pPool The pool.
412 * @param pCpu The disassembled instruction.
413 * @param offFault The access offset.
414 */
415DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
416{
417 /*
418 * i386 linux is using btr to clear X86_PTE_RW.
419 * The functions involved are (2.6.16 source inspection):
420 * clear_bit
421 * ptep_set_wrprotect
422 * copy_one_pte
423 * copy_pte_range
424 * copy_pmd_range
425 * copy_pud_range
426 * copy_page_range
427 * dup_mmap
428 * dup_mm
429 * copy_mm
430 * copy_process
431 * do_fork
432 */
433 if ( pCpu->pCurInstr->opcode == OP_BTR
434 && !(offFault & 4)
435 /** @todo Validate that the bit index is X86_PTE_RW. */
436 )
437 {
438 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
439 return true;
440 }
441 return false;
442}
443
444
445/**
446 * Determin whether the page is likely to have been reused.
447 *
448 * @returns true if we consider the page as being reused for a different purpose.
449 * @returns false if we consider it to still be a paging page.
450 * @param pPage The page in question.
451 * @param pCpu The disassembly info for the faulting insturction.
452 * @param pvFault The fault address.
453 *
454 * @remark The REP prefix check is left to the caller because of STOSD/W.
455 */
456DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
457{
458 switch (pCpu->pCurInstr->opcode)
459 {
460 case OP_PUSH:
461 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
462 return true;
463 case OP_PUSHF:
464 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
465 return true;
466 case OP_PUSHA:
467 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
468 return true;
469 case OP_FXSAVE:
470 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
471 return true;
472 }
473 if ( (pCpu->param1.flags & USE_REG_GEN32)
474 && (pCpu->param1.base.reg_gen32 == USE_REG_ESP))
475 {
476 Log4(("pgmPoolMonitorIsReused: ESP\n"));
477 return true;
478 }
479
480 //if (pPage->fCR3Mix)
481 // return false;
482 return false;
483}
484
485
486/**
487 * Flushes the page being accessed.
488 *
489 * @returns VBox status code suitable for scheduling.
490 * @param pVM The VM handle.
491 * @param pPool The pool.
492 * @param pPage The pool page (head).
493 * @param pCpu The disassembly of the write instruction.
494 * @param pRegFrame The trap register frame.
495 * @param GCPhysFault The fault address as guest physical address.
496 * @param pvFault The fault address.
497 */
498static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
499 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
500{
501 /*
502 * First, do the flushing.
503 */
504 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
505
506 /*
507 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
508 */
509 uint32_t cbWritten;
510 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
511 if (VBOX_SUCCESS(rc2))
512 pRegFrame->eip += pCpu->opsize;
513 else if (rc2 == VERR_EM_INTERPRETER)
514 {
515#ifdef IN_GC
516 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip))
517 {
518 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04:%RGv, ignoring.\n",
519 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
520 rc = VINF_SUCCESS;
521 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
522 }
523 else
524#endif
525 {
526 rc = VINF_EM_RAW_EMULATE_INSTR;
527 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
528 }
529 }
530 else
531 rc = rc2;
532
533 /* See use in pgmPoolAccessHandlerSimple(). */
534 PGM_INVL_GUEST_TLBS();
535
536 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
537 return rc;
538
539}
540
541
542/**
543 * Handles the STOSD write accesses.
544 *
545 * @returns VBox status code suitable for scheduling.
546 * @param pVM The VM handle.
547 * @param pPool The pool.
548 * @param pPage The pool page (head).
549 * @param pCpu The disassembly of the write instruction.
550 * @param pRegFrame The trap register frame.
551 * @param GCPhysFault The fault address as guest physical address.
552 * @param pvFault The fault address.
553 */
554DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
555 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
556{
557 /*
558 * Increment the modification counter and insert it into the list
559 * of modified pages the first time.
560 */
561 if (!pPage->cModifications++)
562 pgmPoolMonitorModifiedInsert(pPool, pPage);
563
564 /*
565 * Execute REP STOSD.
566 *
567 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
568 * write situation, meaning that it's safe to write here.
569 */
570#ifdef IN_GC
571 uint32_t *pu32 = (uint32_t *)pvFault;
572#else
573 RTGCPTR pu32 = pvFault;
574#endif
575 while (pRegFrame->ecx)
576 {
577 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pu32, NULL);
578#ifdef IN_GC
579 *pu32++ = pRegFrame->eax;
580#else
581 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
582 pu32 += 4;
583#endif
584 GCPhysFault += 4;
585 pRegFrame->edi += 4;
586 pRegFrame->ecx--;
587 }
588 pRegFrame->eip += pCpu->opsize;
589
590 /* See use in pgmPoolAccessHandlerSimple(). */
591 PGM_INVL_GUEST_TLBS();
592
593 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
594 return VINF_SUCCESS;
595}
596
597
598/**
599 * Handles the simple write accesses.
600 *
601 * @returns VBox status code suitable for scheduling.
602 * @param pVM The VM handle.
603 * @param pPool The pool.
604 * @param pPage The pool page (head).
605 * @param pCpu The disassembly of the write instruction.
606 * @param pRegFrame The trap register frame.
607 * @param GCPhysFault The fault address as guest physical address.
608 * @param pvFault The fault address.
609 */
610DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
611 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
612{
613 /*
614 * Increment the modification counter and insert it into the list
615 * of modified pages the first time.
616 */
617 if (!pPage->cModifications++)
618 pgmPoolMonitorModifiedInsert(pPool, pPage);
619
620 /*
621 * Clear all the pages. ASSUMES that pvFault is readable.
622 */
623 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
624
625 /*
626 * Interpret the instruction.
627 */
628 uint32_t cb;
629 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
630 if (VBOX_SUCCESS(rc))
631 pRegFrame->eip += pCpu->opsize;
632 else if (rc == VERR_EM_INTERPRETER)
633 {
634# ifdef IN_GC
635 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)(RTGCUINTPTR)pCpu->opaddr))
636 {
637 /* We're not able to handle this in ring-3, so fix the interpreter! */
638 /** @note Should be fine. There's no need to flush the whole thing. */
639#ifndef DEBUG_sandervl
640 AssertMsgFailed(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
641 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
642#endif
643 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch1);
644 rc = pgmPoolMonitorChainFlush(pPool, pPage);
645 }
646 else
647# endif
648 {
649 rc = VINF_EM_RAW_EMULATE_INSTR;
650 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
651 }
652 }
653
654 /*
655 * Quick hack, with logging enabled we're getting stale
656 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
657 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
658 * have to be fixed to support this. But that'll have to wait till next week.
659 *
660 * An alternative is to keep track of the changed PTEs together with the
661 * GCPhys from the guest PT. This may proove expensive though.
662 *
663 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
664 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
665 */
666 PGM_INVL_GUEST_TLBS();
667
668 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
669 return rc;
670}
671
672
673/**
674 * \#PF Handler callback for PT write accesses.
675 *
676 * @returns VBox status code (appropriate for GC return).
677 * @param pVM VM Handle.
678 * @param uErrorCode CPU Error code.
679 * @param pRegFrame Trap register frame.
680 * NULL on DMA and other non CPU access.
681 * @param pvFault The fault address (cr2).
682 * @param GCPhysFault The GC physical address corresponding to pvFault.
683 * @param pvUser User argument.
684 */
685DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
686{
687 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
688 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
689 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
690 LogFlow(("pgmPoolAccessHandler: pvFault=%p pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
691
692 /*
693 * We should ALWAYS have the list head as user parameter. This
694 * is because we use that page to record the changes.
695 */
696 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
697
698 /*
699 * Disassemble the faulting instruction.
700 */
701 DISCPUSTATE Cpu;
702 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
703 AssertRCReturn(rc, rc);
704
705 /*
706 * Check if it's worth dealing with.
707 */
708 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
709 || pPage->fCR3Mix)
710 && !pgmPoolMonitorIsReused(pPage, &Cpu,pvFault)
711 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
712 {
713 /*
714 * Simple instructions, no REP prefix.
715 */
716 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
717 {
718 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
719 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
720 return rc;
721 }
722
723 /*
724 * Windows is frequently doing small memset() operations (netio test 4k+).
725 * We have to deal with these or we'll kill the cache and performance.
726 */
727 if ( Cpu.pCurInstr->opcode == OP_STOSWD
728 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
729 && pRegFrame->ecx <= 0x20
730 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
731 && !((uintptr_t)pvFault & 3)
732 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
733 && Cpu.mode == CPUMODE_32BIT
734 && Cpu.opmode == CPUMODE_32BIT
735 && Cpu.addrmode == CPUMODE_32BIT
736 && Cpu.prefix == PREFIX_REP
737 && !pRegFrame->eflags.Bits.u1DF
738 )
739 {
740 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
741 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
742 return rc;
743 }
744
745 /* REP prefix, don't bother. */
746 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
747 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
748 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
749 }
750
751 /*
752 * Not worth it, so flush it.
753 */
754 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
755 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
756 return rc;
757}
758
759# endif /* !IN_RING3 */
760#endif /* PGMPOOL_WITH_MONITORING */
761
762
763
764#ifdef PGMPOOL_WITH_CACHE
765/**
766 * Inserts a page into the GCPhys hash table.
767 *
768 * @param pPool The pool.
769 * @param pPage The page.
770 */
771DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
772{
773 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
774 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
775 pPage->iNext = pPool->aiHash[iHash];
776 pPool->aiHash[iHash] = pPage->idx;
777}
778
779
780/**
781 * Removes a page from the GCPhys hash table.
782 *
783 * @param pPool The pool.
784 * @param pPage The page.
785 */
786DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
787{
788 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
789 if (pPool->aiHash[iHash] == pPage->idx)
790 pPool->aiHash[iHash] = pPage->iNext;
791 else
792 {
793 uint16_t iPrev = pPool->aiHash[iHash];
794 for (;;)
795 {
796 const int16_t i = pPool->aPages[iPrev].iNext;
797 if (i == pPage->idx)
798 {
799 pPool->aPages[iPrev].iNext = pPage->iNext;
800 break;
801 }
802 if (i == NIL_PGMPOOL_IDX)
803 {
804 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
805 break;
806 }
807 iPrev = i;
808 }
809 }
810 pPage->iNext = NIL_PGMPOOL_IDX;
811}
812
813
814/**
815 * Frees up one cache page.
816 *
817 * @returns VBox status code.
818 * @retval VINF_SUCCESS on success.
819 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
820 * @param pPool The pool.
821 * @param iUser The user index.
822 */
823static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
824{
825 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
826 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
827
828 /*
829 * Select one page from the tail of the age list.
830 */
831 uint16_t iToFree = pPool->iAgeTail;
832 if (iToFree == iUser)
833 iToFree = pPool->aPages[iToFree].iAgePrev;
834/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
835 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
836 {
837 uint16_t i = pPool->aPages[iToFree].iAgePrev;
838 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
839 {
840 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
841 continue;
842 iToFree = i;
843 break;
844 }
845 }
846*/
847 Assert(iToFree != iUser);
848 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
849
850 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
851 if (rc == VINF_SUCCESS)
852 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
853 return rc;
854}
855
856
857/**
858 * Checks if a kind mismatch is really a page being reused
859 * or if it's just normal remappings.
860 *
861 * @returns true if reused and the cached page (enmKind1) should be flushed
862 * @returns false if not reused.
863 * @param enmKind1 The kind of the cached page.
864 * @param enmKind2 The kind of the requested page.
865 */
866static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
867{
868 switch (enmKind1)
869 {
870 /*
871 * Never reuse them. There is no remapping in non-paging mode.
872 */
873 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
874 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
875 return true;
876
877 /*
878 * It's prefectly fine to reuse these, except for PAE and non-paging stuff.
879 */
880 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
881 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
882 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
883 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
884 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
885 switch (enmKind2)
886 {
887 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
888 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
889 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
890 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
891 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
892 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
893 return true;
894 default:
895 return false;
896 }
897
898 /*
899 * It's prefectly fine to reuse these, except for PAE and non-paging stuff.
900 */
901 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
902 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
903 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
904 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
905 switch (enmKind2)
906 {
907 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
908 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
909 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
910 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
911 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
912 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
913 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
914 return true;
915 default:
916 return false;
917 }
918
919 /*
920 * These cannot be flushed, and it's common to reuse the PDs as PTs.
921 */
922 case PGMPOOLKIND_ROOT_32BIT_PD:
923 case PGMPOOLKIND_ROOT_PAE_PD:
924 case PGMPOOLKIND_ROOT_PDPTR:
925 case PGMPOOLKIND_ROOT_PML4:
926 return false;
927
928 default:
929 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
930 }
931}
932
933
934/**
935 * Attempts to satisfy a pgmPoolAlloc request from the cache.
936 *
937 * @returns VBox status code.
938 * @retval VINF_PGM_CACHED_PAGE on success.
939 * @retval VERR_FILE_NOT_FOUND if not found.
940 * @param pPool The pool.
941 * @param GCPhys The GC physical address of the page we're gonna shadow.
942 * @param enmKind The kind of mapping.
943 * @param iUser The shadow page pool index of the user table.
944 * @param iUserTable The index into the user table (shadowed).
945 * @param ppPage Where to store the pointer to the page.
946 */
947static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
948{
949 /*
950 * Look up the GCPhys in the hash.
951 */
952 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
953 if (i != NIL_PGMPOOL_IDX)
954 {
955 do
956 {
957 PPGMPOOLPAGE pPage = &pPool->aPages[i];
958 if (pPage->GCPhys == GCPhys)
959 {
960 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
961 {
962 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
963 if (VBOX_SUCCESS(rc))
964 {
965 *ppPage = pPage;
966 STAM_COUNTER_INC(&pPool->StatCacheHits);
967 return VINF_PGM_CACHED_PAGE;
968 }
969 return rc;
970 }
971
972 /*
973 * The kind is different. In some cases we should now flush the page
974 * as it has been reused, but in most cases this is normal remapping
975 * of PDs as PT or big pages using the GCPhys field in a slightly
976 * different way than the other kinds.
977 */
978 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
979 {
980 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
981 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
982 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
983 break;
984 }
985 }
986
987 /* next */
988 i = pPage->iNext;
989 } while (i != NIL_PGMPOOL_IDX);
990 }
991
992 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
993 STAM_COUNTER_INC(&pPool->StatCacheMisses);
994 return VERR_FILE_NOT_FOUND;
995}
996
997
998/**
999 * Inserts a page into the cache.
1000 *
1001 * @param pPool The pool.
1002 * @param pPage The cached page.
1003 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1004 */
1005static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1006{
1007 /*
1008 * Insert into the GCPhys hash if the page is fit for that.
1009 */
1010 Assert(!pPage->fCached);
1011 if (fCanBeCached)
1012 {
1013 pPage->fCached = true;
1014 pgmPoolHashInsert(pPool, pPage);
1015 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1016 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1017 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1018 }
1019 else
1020 {
1021 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1022 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1023 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1024 }
1025
1026 /*
1027 * Insert at the head of the age list.
1028 */
1029 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1030 pPage->iAgeNext = pPool->iAgeHead;
1031 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1032 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1033 else
1034 pPool->iAgeTail = pPage->idx;
1035 pPool->iAgeHead = pPage->idx;
1036}
1037
1038
1039/**
1040 * Flushes a cached page.
1041 *
1042 * @param pPool The pool.
1043 * @param pPage The cached page.
1044 */
1045static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1046{
1047 /*
1048 * Remove the page from the hash.
1049 */
1050 if (pPage->fCached)
1051 {
1052 pPage->fCached = false;
1053 pgmPoolHashRemove(pPool, pPage);
1054 }
1055 else
1056 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1057
1058 /*
1059 * Remove it from the age list.
1060 */
1061 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1062 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1063 else
1064 pPool->iAgeTail = pPage->iAgePrev;
1065 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1066 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1067 else
1068 pPool->iAgeHead = pPage->iAgeNext;
1069 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1070 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1071}
1072#endif /* PGMPOOL_WITH_CACHE */
1073
1074
1075#ifdef PGMPOOL_WITH_MONITORING
1076/**
1077 * Looks for pages sharing the monitor.
1078 *
1079 * @returns Pointer to the head page.
1080 * @returns NULL if not found.
1081 * @param pPool The Pool
1082 * @param pNewPage The page which is going to be monitored.
1083 */
1084static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1085{
1086#ifdef PGMPOOL_WITH_CACHE
1087 /*
1088 * Look up the GCPhys in the hash.
1089 */
1090 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1091 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1092 if (i == NIL_PGMPOOL_IDX)
1093 return NULL;
1094 do
1095 {
1096 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1097 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1098 && pPage != pNewPage)
1099 {
1100 switch (pPage->enmKind)
1101 {
1102 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1103 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1104 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1105 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1106 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1107 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1108 case PGMPOOLKIND_ROOT_32BIT_PD:
1109 case PGMPOOLKIND_ROOT_PAE_PD:
1110 case PGMPOOLKIND_ROOT_PDPTR:
1111 case PGMPOOLKIND_ROOT_PML4:
1112 {
1113 /* find the head */
1114 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1115 {
1116 Assert(pPage->iMonitoredPrev != pPage->idx);
1117 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1118 }
1119 return pPage;
1120 }
1121
1122 /* ignore, no monitoring. */
1123 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1124 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1125 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1126 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1127 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1128 break;
1129 default:
1130 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1131 }
1132 }
1133
1134 /* next */
1135 i = pPage->iNext;
1136 } while (i != NIL_PGMPOOL_IDX);
1137#endif
1138 return NULL;
1139}
1140
1141/**
1142 * Enabled write monitoring of a guest page.
1143 *
1144 * @returns VBox status code.
1145 * @retval VINF_SUCCESS on success.
1146 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1147 * @param pPool The pool.
1148 * @param pPage The cached page.
1149 */
1150static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1151{
1152 /*
1153 * Filter out the relevant kinds.
1154 */
1155 switch (pPage->enmKind)
1156 {
1157 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1158 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1159 break;
1160
1161 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1162 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1163 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1164 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1165 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1166 /* Nothing to monitor here. */
1167 return VINF_SUCCESS;
1168
1169 case PGMPOOLKIND_ROOT_32BIT_PD:
1170 case PGMPOOLKIND_ROOT_PAE_PD:
1171#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1172 break;
1173#endif
1174 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1175 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1176 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1177 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1178 case PGMPOOLKIND_ROOT_PDPTR:
1179 case PGMPOOLKIND_ROOT_PML4:
1180 default:
1181 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1182 }
1183
1184 /*
1185 * Install handler.
1186 */
1187 int rc;
1188 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1189 if (pPageHead)
1190 {
1191 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1192 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1193 pPage->iMonitoredPrev = pPageHead->idx;
1194 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1195 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1196 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1197 pPageHead->iMonitoredNext = pPage->idx;
1198 rc = VINF_SUCCESS;
1199 }
1200 else
1201 {
1202 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1203 PVM pVM = pPool->CTXSUFF(pVM);
1204 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1205 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1206 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1207 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pPage),
1208 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pPage),
1209 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pPage),
1210 pPool->pszAccessHandler);
1211 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1212 * the heap size should suffice. */
1213 AssertFatalRC(rc);
1214 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1215 rc = VERR_PGM_POOL_CLEARED;
1216 }
1217 pPage->fMonitored = true;
1218 return rc;
1219}
1220
1221
1222/**
1223 * Disables write monitoring of a guest page.
1224 *
1225 * @returns VBox status code.
1226 * @retval VINF_SUCCESS on success.
1227 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1228 * @param pPool The pool.
1229 * @param pPage The cached page.
1230 */
1231static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1232{
1233 /*
1234 * Filter out the relevant kinds.
1235 */
1236 switch (pPage->enmKind)
1237 {
1238 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1239 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1240 break;
1241
1242 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1243 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1244 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1245 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1246 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1247 /* Nothing to monitor here. */
1248 return VINF_SUCCESS;
1249
1250 case PGMPOOLKIND_ROOT_32BIT_PD:
1251 case PGMPOOLKIND_ROOT_PAE_PD:
1252#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1253 break;
1254#endif
1255 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1256 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1257 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1258 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1259 case PGMPOOLKIND_ROOT_PDPTR:
1260 case PGMPOOLKIND_ROOT_PML4:
1261 default:
1262 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1263 }
1264
1265 /*
1266 * Remove the page from the monitored list or uninstall it if last.
1267 */
1268 const PVM pVM = pPool->CTXSUFF(pVM);
1269 int rc;
1270 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1271 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1272 {
1273 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1274 {
1275 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1276 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1277 pNewHead->fCR3Mix = pPage->fCR3Mix;
1278 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1279 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pNewHead),
1280 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pNewHead),
1281 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pNewHead),
1282 pPool->pszAccessHandler);
1283 AssertFatalRCSuccess(rc);
1284 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1285 }
1286 else
1287 {
1288 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1289 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1290 {
1291 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1292 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1293 }
1294 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1295 rc = VINF_SUCCESS;
1296 }
1297 }
1298 else
1299 {
1300 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1301 AssertFatalRC(rc);
1302 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1303 rc = VERR_PGM_POOL_CLEARED;
1304 }
1305 pPage->fMonitored = false;
1306
1307 /*
1308 * Remove it from the list of modified pages (if in it).
1309 */
1310 pgmPoolMonitorModifiedRemove(pPool, pPage);
1311
1312 return rc;
1313}
1314
1315
1316#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1317/**
1318 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1319 *
1320 * @param pPool The Pool.
1321 * @param pPage A page in the chain.
1322 * @param fCR3Mix The new fCR3Mix value.
1323 */
1324static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1325{
1326 /* current */
1327 pPage->fCR3Mix = fCR3Mix;
1328
1329 /* before */
1330 int16_t idx = pPage->iMonitoredPrev;
1331 while (idx != NIL_PGMPOOL_IDX)
1332 {
1333 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1334 idx = pPool->aPages[idx].iMonitoredPrev;
1335 }
1336
1337 /* after */
1338 idx = pPage->iMonitoredNext;
1339 while (idx != NIL_PGMPOOL_IDX)
1340 {
1341 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1342 idx = pPool->aPages[idx].iMonitoredNext;
1343 }
1344}
1345
1346
1347/**
1348 * Installs or modifies monitoring of a CR3 page (special).
1349 *
1350 * We're pretending the CR3 page is shadowed by the pool so we can use the
1351 * generic mechanisms in detecting chained monitoring. (This also gives us a
1352 * tast of what code changes are required to really pool CR3 shadow pages.)
1353 *
1354 * @returns VBox status code.
1355 * @param pPool The pool.
1356 * @param idxRoot The CR3 (root) page index.
1357 * @param GCPhysCR3 The (new) CR3 value.
1358 */
1359int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1360{
1361 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1362 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1363 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1364 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1365
1366 /*
1367 * The unlikely case where it already matches.
1368 */
1369 if (pPage->GCPhys == GCPhysCR3)
1370 {
1371 Assert(pPage->fMonitored);
1372 return VINF_SUCCESS;
1373 }
1374
1375 /*
1376 * Flush the current monitoring and remove it from the hash.
1377 */
1378 int rc = VINF_SUCCESS;
1379 if (pPage->fMonitored)
1380 {
1381 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1382 rc = pgmPoolMonitorFlush(pPool, pPage);
1383 if (rc == VERR_PGM_POOL_CLEARED)
1384 rc = VINF_SUCCESS;
1385 else
1386 AssertFatalRC(rc);
1387 pgmPoolHashRemove(pPool, pPage);
1388 }
1389
1390 /*
1391 * Monitor the page at the new location and insert it into the hash.
1392 */
1393 pPage->GCPhys = GCPhysCR3;
1394 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1395 if (rc2 != VERR_PGM_POOL_CLEARED)
1396 {
1397 AssertFatalRC(rc2);
1398 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1399 rc = rc2;
1400 }
1401 pgmPoolHashInsert(pPool, pPage);
1402 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1403 return rc;
1404}
1405
1406
1407/**
1408 * Removes the monitoring of a CR3 page (special).
1409 *
1410 * @returns VBox status code.
1411 * @param pPool The pool.
1412 * @param idxRoot The CR3 (root) page index.
1413 */
1414int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1415{
1416 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1417 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1418 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1419 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1420
1421 if (!pPage->fMonitored)
1422 return VINF_SUCCESS;
1423
1424 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1425 int rc = pgmPoolMonitorFlush(pPool, pPage);
1426 if (rc != VERR_PGM_POOL_CLEARED)
1427 AssertFatalRC(rc);
1428 else
1429 rc = VINF_SUCCESS;
1430 pgmPoolHashRemove(pPool, pPage);
1431 Assert(!pPage->fMonitored);
1432 pPage->GCPhys = NIL_RTGCPHYS;
1433 return rc;
1434}
1435#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1436
1437
1438/**
1439 * Inserts the page into the list of modified pages.
1440 *
1441 * @param pPool The pool.
1442 * @param pPage The page.
1443 */
1444void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1445{
1446 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1447 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1448 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1449 && pPool->iModifiedHead != pPage->idx,
1450 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1451 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1452 pPool->iModifiedHead, pPool->cModifiedPages));
1453
1454 pPage->iModifiedNext = pPool->iModifiedHead;
1455 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1456 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1457 pPool->iModifiedHead = pPage->idx;
1458 pPool->cModifiedPages++;
1459#ifdef VBOX_WITH_STATISTICS
1460 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1461 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1462#endif
1463}
1464
1465
1466/**
1467 * Removes the page from the list of modified pages and resets the
1468 * moficiation counter.
1469 *
1470 * @param pPool The pool.
1471 * @param pPage The page which is believed to be in the list of modified pages.
1472 */
1473static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1474{
1475 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1476 if (pPool->iModifiedHead == pPage->idx)
1477 {
1478 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1479 pPool->iModifiedHead = pPage->iModifiedNext;
1480 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1481 {
1482 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1483 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1484 }
1485 pPool->cModifiedPages--;
1486 }
1487 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1488 {
1489 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1490 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1491 {
1492 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1493 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1494 }
1495 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1496 pPool->cModifiedPages--;
1497 }
1498 else
1499 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1500 pPage->cModifications = 0;
1501}
1502
1503
1504/**
1505 * Zaps the list of modified pages, resetting their modification counters in the process.
1506 *
1507 * @param pVM The VM handle.
1508 */
1509void pgmPoolMonitorModifiedClearAll(PVM pVM)
1510{
1511 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1512 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1513
1514 unsigned cPages = 0; NOREF(cPages);
1515 uint16_t idx = pPool->iModifiedHead;
1516 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1517 while (idx != NIL_PGMPOOL_IDX)
1518 {
1519 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1520 idx = pPage->iModifiedNext;
1521 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1522 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1523 pPage->cModifications = 0;
1524 Assert(++cPages);
1525 }
1526 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1527 pPool->cModifiedPages = 0;
1528}
1529
1530
1531/**
1532 * Clear all shadow pages and clear all modification counters.
1533 *
1534 * @param pVM The VM handle.
1535 * @remark Should only be used when monitoring is available, thus placed in
1536 * the PGMPOOL_WITH_MONITORING #ifdef.
1537 */
1538void pgmPoolClearAll(PVM pVM)
1539{
1540 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1541 STAM_PROFILE_START(&pPool->StatClearAll, c);
1542 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1543
1544 /*
1545 * Iterate all the pages until we've encountered all that in use.
1546 * This is simple but not quite optimal solution.
1547 */
1548 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1549 unsigned cLeft = pPool->cUsedPages;
1550 unsigned iPage = pPool->cCurPages;
1551 while (--iPage >= PGMPOOL_IDX_FIRST)
1552 {
1553 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1554 if (pPage->GCPhys != NIL_RTGCPHYS)
1555 {
1556 switch (pPage->enmKind)
1557 {
1558 /*
1559 * We only care about shadow page tables.
1560 */
1561 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1562 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1563 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1564 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1565 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1566 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1567 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1568 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1569 {
1570#ifdef PGMPOOL_WITH_USER_TRACKING
1571 if (pPage->cPresent)
1572#endif
1573 {
1574 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1575 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1576 ASMMemZeroPage(pvShw);
1577 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1578#ifdef PGMPOOL_WITH_USER_TRACKING
1579 pPage->cPresent = 0;
1580 pPage->iFirstPresent = ~0;
1581#endif
1582 }
1583 }
1584 /* fall thru */
1585
1586 default:
1587 Assert(!pPage->cModifications || ++cModifiedPages);
1588 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1589 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1590 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1591 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1592 pPage->cModifications = 0;
1593 break;
1594
1595 }
1596 if (!--cLeft)
1597 break;
1598 }
1599 }
1600
1601 /* swipe the special pages too. */
1602 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1603 {
1604 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1605 if (pPage->GCPhys != NIL_RTGCPHYS)
1606 {
1607 Assert(!pPage->cModifications || ++cModifiedPages);
1608 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1609 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1610 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1611 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1612 pPage->cModifications = 0;
1613 }
1614 }
1615
1616 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1617 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1618 pPool->cModifiedPages = 0;
1619
1620#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1621 /*
1622 * Clear all the GCPhys links and rebuild the phys ext free list.
1623 */
1624 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
1625 pRam;
1626 pRam = pRam->CTXSUFF(pNext))
1627 {
1628 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1629 while (iPage-- > 0)
1630 pRam->aHCPhys[iPage] &= MM_RAM_FLAGS_NO_REFS_MASK;
1631 }
1632
1633 pPool->iPhysExtFreeHead = 0;
1634 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1635 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1636 for (unsigned i = 0; i < cMaxPhysExts; i++)
1637 {
1638 paPhysExts[i].iNext = i + 1;
1639 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1640 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1641 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1642 }
1643 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1644#endif
1645
1646
1647 pPool->cPresent = 0;
1648 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1649}
1650#endif /* PGMPOOL_WITH_MONITORING */
1651
1652
1653#ifdef PGMPOOL_WITH_USER_TRACKING
1654/**
1655 * Frees up at least one user entry.
1656 *
1657 * @returns VBox status code.
1658 * @retval VINF_SUCCESS if successfully added.
1659 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1660 * @param pPool The pool.
1661 * @param iUser The user index.
1662 */
1663static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1664{
1665 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1666#ifdef PGMPOOL_WITH_CACHE
1667 /*
1668 * Just free cached pages in a braindead fashion.
1669 */
1670 /** @todo walk the age list backwards and free the first with usage. */
1671 int rc = VINF_SUCCESS;
1672 do
1673 {
1674 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1675 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1676 rc = rc2;
1677 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1678 return rc;
1679#else
1680 /*
1681 * Lazy approach.
1682 */
1683 pgmPoolFlushAllInt(pPool);
1684 return VERR_PGM_POOL_FLUSHED;
1685#endif
1686}
1687
1688
1689/**
1690 * Inserts a page into the cache.
1691 *
1692 * This will create user node for the page, insert it into the GCPhys
1693 * hash, and insert it into the age list.
1694 *
1695 * @returns VBox status code.
1696 * @retval VINF_SUCCESS if successfully added.
1697 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1698 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1699 * @param pPool The pool.
1700 * @param pPage The cached page.
1701 * @param GCPhys The GC physical address of the page we're gonna shadow.
1702 * @param iUser The user index.
1703 * @param iUserTable The user table index.
1704 */
1705DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1706{
1707 int rc = VINF_SUCCESS;
1708 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1709
1710 /*
1711 * Find free a user node.
1712 */
1713 uint16_t i = pPool->iUserFreeHead;
1714 if (i == NIL_PGMPOOL_USER_INDEX)
1715 {
1716 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1717 if (VBOX_FAILURE(rc))
1718 return rc;
1719 i = pPool->iUserFreeHead;
1720 }
1721
1722 /*
1723 * Unlink the user node from the free list,
1724 * initialize and insert it into the user list.
1725 */
1726 pPool->iUserFreeHead = pUser[i].iNext;
1727 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1728 pUser[i].iUser = iUser;
1729 pUser[i].iUserTable = iUserTable;
1730 pPage->iUserHead = i;
1731
1732 /*
1733 * Insert into cache and enable monitoring of the guest page if enabled.
1734 *
1735 * Until we implement caching of all levels, including the CR3 one, we'll
1736 * have to make sure we don't try monitor & cache any recursive reuse of
1737 * a monitored CR3 page. Because all windows versions are doing this we'll
1738 * have to be able to do combined access monitoring, CR3 + PT and
1739 * PD + PT (guest PAE).
1740 *
1741 * Update:
1742 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1743 */
1744#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1745# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1746 const bool fCanBeMonitored = true;
1747# else
1748 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1749 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1750 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1751# endif
1752# ifdef PGMPOOL_WITH_CACHE
1753 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1754# endif
1755 if (fCanBeMonitored)
1756 {
1757# ifdef PGMPOOL_WITH_MONITORING
1758 rc = pgmPoolMonitorInsert(pPool, pPage);
1759 if (rc == VERR_PGM_POOL_CLEARED)
1760 {
1761 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1762# ifndef PGMPOOL_WITH_CACHE
1763 pgmPoolMonitorFlush(pPool, pPage);
1764 rc = VERR_PGM_POOL_FLUSHED;
1765# endif
1766 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1767 pUser[i].iNext = pPool->iUserFreeHead;
1768 pUser[i].iUser = NIL_PGMPOOL_IDX;
1769 pPool->iUserFreeHead = i;
1770 }
1771 }
1772# endif
1773#endif /* PGMPOOL_WITH_MONITORING */
1774 return rc;
1775}
1776
1777
1778# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1779/**
1780 * Adds a user reference to a page.
1781 *
1782 * This will
1783 * This will move the page to the head of the
1784 *
1785 * @returns VBox status code.
1786 * @retval VINF_SUCCESS if successfully added.
1787 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1788 * @param pPool The pool.
1789 * @param pPage The cached page.
1790 * @param iUser The user index.
1791 * @param iUserTable The user table.
1792 */
1793static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1794{
1795 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1796
1797# ifdef VBOX_STRICT
1798 /*
1799 * Check that the entry doesn't already exists.
1800 */
1801 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1802 {
1803 uint16_t i = pPage->iUserHead;
1804 do
1805 {
1806 Assert(i < pPool->cMaxUsers);
1807 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%d %d\n", iUser, iUserTable));
1808 i = paUsers[i].iNext;
1809 } while (i != NIL_PGMPOOL_USER_INDEX);
1810 }
1811# endif
1812
1813 /*
1814 * Allocate a user node.
1815 */
1816 uint16_t i = pPool->iUserFreeHead;
1817 if (i == NIL_PGMPOOL_USER_INDEX)
1818 {
1819 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1820 if (VBOX_FAILURE(rc))
1821 return rc;
1822 i = pPool->iUserFreeHead;
1823 }
1824 pPool->iUserFreeHead = paUsers[i].iNext;
1825
1826 /*
1827 * Initialize the user node and insert it.
1828 */
1829 paUsers[i].iNext = pPage->iUserHead;
1830 paUsers[i].iUser = iUser;
1831 paUsers[i].iUserTable = iUserTable;
1832 pPage->iUserHead = i;
1833
1834# ifdef PGMPOOL_WITH_CACHE
1835 /*
1836 * Tell the cache to update its replacement stats for this page.
1837 */
1838 pgmPoolCacheUsed(pPool, pPage);
1839# endif
1840 return VINF_SUCCESS;
1841}
1842# endif /* PGMPOOL_WITH_CACHE */
1843
1844
1845/**
1846 * Frees a user record associated with a page.
1847 *
1848 * This does not clear the entry in the user table, it simply replaces the
1849 * user record to the chain of free records.
1850 *
1851 * @param pPool The pool.
1852 * @param HCPhys The HC physical address of the shadow page.
1853 * @param iUser The shadow page pool index of the user table.
1854 * @param iUserTable The index into the user table (shadowed).
1855 */
1856static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1857{
1858 /*
1859 * Unlink and free the specified user entry.
1860 */
1861 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1862
1863 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1864 uint16_t i = pPage->iUserHead;
1865 if ( i != NIL_PGMPOOL_USER_INDEX
1866 && paUsers[i].iUser == iUser
1867 && paUsers[i].iUserTable == iUserTable)
1868 {
1869 pPage->iUserHead = paUsers[i].iNext;
1870
1871 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1872 paUsers[i].iNext = pPool->iUserFreeHead;
1873 pPool->iUserFreeHead = i;
1874 return;
1875 }
1876
1877 /* General: Linear search. */
1878 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1879 while (i != NIL_PGMPOOL_USER_INDEX)
1880 {
1881 if ( paUsers[i].iUser == iUser
1882 && paUsers[i].iUserTable == iUserTable)
1883 {
1884 if (iPrev != NIL_PGMPOOL_USER_INDEX)
1885 paUsers[iPrev].iNext = paUsers[i].iNext;
1886 else
1887 pPage->iUserHead = paUsers[i].iNext;
1888
1889 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1890 paUsers[i].iNext = pPool->iUserFreeHead;
1891 pPool->iUserFreeHead = i;
1892 return;
1893 }
1894 iPrev = i;
1895 i = paUsers[i].iNext;
1896 }
1897
1898 /* Fatal: didn't find it */
1899 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
1900 iUser, iUserTable, pPage->GCPhys));
1901}
1902
1903
1904/**
1905 * Gets the entry size of a shadow table.
1906 *
1907 * @param enmKind
1908 * The kind of page.
1909 *
1910 * @returns The size of the entry in bytes. That is, 4 or 8.
1911 * @returns If the kind is not for a table, an assertion is raised and 0 is
1912 * returned.
1913 */
1914DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
1915{
1916 switch (enmKind)
1917 {
1918 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1919 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1920 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1921 case PGMPOOLKIND_ROOT_32BIT_PD:
1922 return 4;
1923
1924 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1925 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1926 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1927 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1928 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1929 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1930 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1931 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1932 case PGMPOOLKIND_ROOT_PAE_PD:
1933 case PGMPOOLKIND_ROOT_PDPTR:
1934 case PGMPOOLKIND_ROOT_PML4:
1935 return 8;
1936
1937 default:
1938 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1939 }
1940}
1941
1942
1943/**
1944 * Gets the entry size of a guest table.
1945 *
1946 * @param enmKind
1947 * The kind of page.
1948 *
1949 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
1950 * @returns If the kind is not for a table, an assertion is raised and 0 is
1951 * returned.
1952 */
1953DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
1954{
1955 switch (enmKind)
1956 {
1957 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1958 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1959 case PGMPOOLKIND_ROOT_32BIT_PD:
1960 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1961 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1962 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1963 return 4;
1964
1965 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1966 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1967 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1968 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1969 case PGMPOOLKIND_ROOT_PAE_PD:
1970 case PGMPOOLKIND_ROOT_PDPTR:
1971 case PGMPOOLKIND_ROOT_PML4:
1972 return 8;
1973
1974 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1975 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1976 /** @todo can we return 0? (nobody is calling this...) */
1977 return 0;
1978
1979 default:
1980 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1981 }
1982}
1983
1984
1985#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1986/**
1987 * Scans one shadow page table for mappings of a physical page.
1988 *
1989 * @param pVM The VM handle.
1990 * @param pHCPhys The aHCPhys ramrange entry in question.
1991 * @param iShw The shadow page table.
1992 * @param cRefs The number of references made in that PT.
1993 */
1994static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCRTHCPHYS pHCPhys, uint16_t iShw, uint16_t cRefs)
1995{
1996 LogFlow(("pgmPoolTrackFlushGCPhysPT: pHCPhys=%p:{%RHp} iShw=%d cRefs=%d\n", pHCPhys, *pHCPhys, iShw, cRefs));
1997 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1998
1999 /*
2000 * Assert sanity.
2001 */
2002 Assert(cRefs == 1);
2003 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2004 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2005
2006 /*
2007 * Then, clear the actual mappings to the page in the shadow PT.
2008 */
2009 switch (pPage->enmKind)
2010 {
2011 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2012 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2013 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2014 {
2015 const uint32_t u32 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2016 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2017 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2018 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2019 {
2020 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2021 pPT->a[i].u = 0;
2022 cRefs--;
2023 if (!cRefs)
2024 return;
2025 }
2026#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2027 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2028 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2029 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2030 {
2031 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2032 pPT->a[i].u = 0;
2033 }
2034#endif
2035 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2036 break;
2037 }
2038
2039 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2040 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2041 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2042 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2043 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2044 {
2045 const uint64_t u64 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2046 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2047 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2048 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2049 {
2050 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2051 pPT->a[i].u = 0;
2052 cRefs--;
2053 if (!cRefs)
2054 return;
2055 }
2056#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2057 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2058 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2059 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2060 {
2061 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2062 pPT->a[i].u = 0;
2063 }
2064#endif
2065 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2066 break;
2067 }
2068
2069 default:
2070 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2071 }
2072}
2073
2074
2075/**
2076 * Scans one shadow page table for mappings of a physical page.
2077 *
2078 * @param pVM The VM handle.
2079 * @param pHCPhys The aHCPhys ramrange entry in question.
2080 * @param iShw The shadow page table.
2081 * @param cRefs The number of references made in that PT.
2082 */
2083void pgmPoolTrackFlushGCPhysPT(PVM pVM, PRTHCPHYS pHCPhys, uint16_t iShw, uint16_t cRefs)
2084{
2085 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2086 LogFlow(("pgmPoolTrackFlushGCPhysPT: pHCPhys=%p:{%RHp} iShw=%d cRefs=%d\n", pHCPhys, *pHCPhys, iShw, cRefs));
2087 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2088 pgmPoolTrackFlushGCPhysPTInt(pVM, pHCPhys, iShw, cRefs);
2089 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2090 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2091}
2092
2093
2094/**
2095 * Flushes a list of shadow page tables mapping the same physical page.
2096 *
2097 * @param pVM The VM handle.
2098 * @param pHCPhys The aHCPhys ramrange entry in question.
2099 * @param iPhysExt The physical cross reference extent list to flush.
2100 */
2101void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PRTHCPHYS pHCPhys, uint16_t iPhysExt)
2102{
2103 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2104 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2105 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pHCPhys=%p:{%RHp} iPhysExt\n", pHCPhys, *pHCPhys, iPhysExt));
2106
2107 const uint16_t iPhysExtStart = iPhysExt;
2108 PPGMPOOLPHYSEXT pPhysExt;
2109 do
2110 {
2111 Assert(iPhysExt < pPool->cMaxPhysExts);
2112 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2113 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2114 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2115 {
2116 pgmPoolTrackFlushGCPhysPTInt(pVM, pHCPhys, pPhysExt->aidx[i], 1);
2117 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2118 }
2119
2120 /* next */
2121 iPhysExt = pPhysExt->iNext;
2122 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2123
2124 /* insert the list into the free list and clear the ram range entry. */
2125 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2126 pPool->iPhysExtFreeHead = iPhysExtStart;
2127 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2128
2129 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2130}
2131#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2132
2133
2134/**
2135 * Scans all shadow page tables for mappings of a physical page.
2136 *
2137 * This may be slow, but it's most likely more efficient than cleaning
2138 * out the entire page pool / cache.
2139 *
2140 * @returns VBox status code.
2141 * @retval VINF_SUCCESS if all references has been successfully cleared.
2142 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2143 * a page pool cleaning.
2144 *
2145 * @param pVM The VM handle.
2146 * @param pHCPhys The aHCPhys ramrange entry in question.
2147 */
2148int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PRTHCPHYS pHCPhys)
2149{
2150 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2151 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2152 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d *pHCPhys=%RHp\n",
2153 pPool->cUsedPages, pPool->cPresent, *pHCPhys));
2154
2155#if 1
2156 /*
2157 * There is a limit to what makes sense.
2158 */
2159 if (pPool->cPresent > 1024)
2160 {
2161 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2162 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2163 return VINF_PGM_GCPHYS_ALIASED;
2164 }
2165#endif
2166
2167 /*
2168 * Iterate all the pages until we've encountered all that in use.
2169 * This is simple but not quite optimal solution.
2170 */
2171 const uint64_t u64 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2172 const uint32_t u32 = u64;
2173 unsigned cLeft = pPool->cUsedPages;
2174 unsigned iPage = pPool->cCurPages;
2175 while (--iPage >= PGMPOOL_IDX_FIRST)
2176 {
2177 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2178 if (pPage->GCPhys != NIL_RTGCPHYS)
2179 {
2180 switch (pPage->enmKind)
2181 {
2182 /*
2183 * We only care about shadow page tables.
2184 */
2185 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2186 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2187 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2188 {
2189 unsigned cPresent = pPage->cPresent;
2190 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2191 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2192 if (pPT->a[i].n.u1Present)
2193 {
2194 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2195 {
2196 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2197 pPT->a[i].u = 0;
2198 }
2199 if (!--cPresent)
2200 break;
2201 }
2202 break;
2203 }
2204
2205 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2206 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2207 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2208 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2209 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2210 {
2211 unsigned cPresent = pPage->cPresent;
2212 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2213 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2214 if (pPT->a[i].n.u1Present)
2215 {
2216 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2217 {
2218 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2219 pPT->a[i].u = 0;
2220 }
2221 if (!--cPresent)
2222 break;
2223 }
2224 break;
2225 }
2226 }
2227 if (!--cLeft)
2228 break;
2229 }
2230 }
2231
2232 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2233 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2234 return VINF_SUCCESS;
2235}
2236
2237
2238/**
2239 * Clears the user entry in a user table.
2240 *
2241 * This is used to remove all references to a page when flushing it.
2242 */
2243static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2244{
2245 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2246 Assert(pUser->iUser < pPool->cCurPages);
2247
2248 /*
2249 * Map the user page.
2250 */
2251 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2252 union
2253 {
2254 uint64_t *pau64;
2255 uint32_t *pau32;
2256 } u;
2257 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2258
2259#ifdef VBOX_STRICT
2260 /*
2261 * Some sanity checks.
2262 */
2263 switch (pUserPage->enmKind)
2264 {
2265 case PGMPOOLKIND_ROOT_32BIT_PD:
2266 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2267 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2268 break;
2269 case PGMPOOLKIND_ROOT_PAE_PD:
2270 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2271 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2272 break;
2273 case PGMPOOLKIND_ROOT_PDPTR:
2274 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2275 Assert(pUser->iUserTable < 4);
2276 break;
2277 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2278 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2279 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2280 break;
2281 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2282 case PGMPOOLKIND_ROOT_PML4:
2283 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2284 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2285 break;
2286 default:
2287 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2288 break;
2289 }
2290#endif /* VBOX_STRICT */
2291
2292 /*
2293 * Clear the entry in the user page.
2294 */
2295 switch (pUserPage->enmKind)
2296 {
2297 /* 32-bit entries */
2298 case PGMPOOLKIND_ROOT_32BIT_PD:
2299 u.pau32[pUser->iUserTable] = 0;
2300 break;
2301
2302 /* 64-bit entries */
2303 case PGMPOOLKIND_ROOT_PAE_PD:
2304 case PGMPOOLKIND_ROOT_PDPTR:
2305 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2306 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2307 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2308 case PGMPOOLKIND_ROOT_PML4:
2309 u.pau64[pUser->iUserTable] = 0;
2310 break;
2311
2312 default:
2313 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2314 }
2315}
2316
2317
2318/**
2319 * Clears all users of a page.
2320 */
2321static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2322{
2323 /*
2324 * Free all the user records.
2325 */
2326 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2327 uint16_t i = pPage->iUserHead;
2328 while (i != NIL_PGMPOOL_USER_INDEX)
2329 {
2330 /* Clear enter in user table. */
2331 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2332
2333 /* Free it. */
2334 const uint16_t iNext = paUsers[i].iNext;
2335 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2336 paUsers[i].iNext = pPool->iUserFreeHead;
2337 pPool->iUserFreeHead = i;
2338
2339 /* Next. */
2340 i = iNext;
2341 }
2342 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2343}
2344
2345
2346#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2347/**
2348 * Allocates a new physical cross reference extent.
2349 *
2350 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2351 * @param pVM The VM handle.
2352 * @param piPhysExt Where to store the phys ext index.
2353 */
2354PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2355{
2356 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2357 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2358 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2359 {
2360 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2361 return NULL;
2362 }
2363 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2364 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2365 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2366 *piPhysExt = iPhysExt;
2367 return pPhysExt;
2368}
2369
2370
2371/**
2372 * Frees a physical cross reference extent.
2373 *
2374 * @param pVM The VM handle.
2375 * @param iPhysExt The extent to free.
2376 */
2377void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2378{
2379 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2380 Assert(iPhysExt < pPool->cMaxPhysExts);
2381 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2382 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2383 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2384 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2385 pPool->iPhysExtFreeHead = iPhysExt;
2386}
2387
2388
2389/**
2390 * Frees a physical cross reference extent.
2391 *
2392 * @param pVM The VM handle.
2393 * @param iPhysExt The extent to free.
2394 */
2395void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2396{
2397 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2398
2399 const uint16_t iPhysExtStart = iPhysExt;
2400 PPGMPOOLPHYSEXT pPhysExt;
2401 do
2402 {
2403 Assert(iPhysExt < pPool->cMaxPhysExts);
2404 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2405 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2406 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2407
2408 /* next */
2409 iPhysExt = pPhysExt->iNext;
2410 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2411
2412 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2413 pPool->iPhysExtFreeHead = iPhysExtStart;
2414}
2415
2416/**
2417 * Insert a reference into a list of physical cross reference extents.
2418 *
2419 * @returns The new ram range flags (top 16-bits).
2420 *
2421 * @param pVM The VM handle.
2422 * @param iPhysExt The physical extent index of the list head.
2423 * @param iShwPT The shadow page table index.
2424 *
2425 */
2426static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2427{
2428 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2429 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2430
2431 /* special common case. */
2432 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2433 {
2434 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2435 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2436 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2437 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2438 }
2439
2440 /* general treatment. */
2441 const uint16_t iPhysExtStart = iPhysExt;
2442 unsigned cMax = 15;
2443 for (;;)
2444 {
2445 Assert(iPhysExt < pPool->cMaxPhysExts);
2446 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2447 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2448 {
2449 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2450 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2451 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2452 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2453 }
2454 if (!--cMax)
2455 {
2456 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2457 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2458 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2459 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2460 }
2461 }
2462
2463 /* add another extent to the list. */
2464 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2465 if (!pNew)
2466 {
2467 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2468 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2469 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2470 }
2471 pNew->iNext = iPhysExtStart;
2472 pNew->aidx[0] = iShwPT;
2473 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2474 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2475}
2476
2477
2478/**
2479 * Add a reference to guest physical page where extents are in use.
2480 *
2481 * @returns The new ram range flags (top 16-bits).
2482 *
2483 * @param pVM The VM handle.
2484 * @param u16 The ram range flags (top 16-bits).
2485 * @param iShwPT The shadow page table index.
2486 */
2487uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2488{
2489 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2490 {
2491 /*
2492 * Convert to extent list.
2493 */
2494 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2495 uint16_t iPhysExt;
2496 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2497 if (pPhysExt)
2498 {
2499 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2500 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2501 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2502 pPhysExt->aidx[1] = iShwPT;
2503 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2504 }
2505 else
2506 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2507 }
2508 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2509 {
2510 /*
2511 * Insert into the extent list.
2512 */
2513 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2514 }
2515 else
2516 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2517 return u16;
2518}
2519
2520
2521/**
2522 * Clear references to guest physical memory.
2523 *
2524 * @param pPool The pool.
2525 * @param pPage The page.
2526 * @param pHCPhys Pointer to the aHCPhys entry in the ram range.
2527 */
2528void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PRTHCPHYS pHCPhys)
2529{
2530 const unsigned cRefs = *pHCPhys >> MM_RAM_FLAGS_CREFS_SHIFT;
2531 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d *pHCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, *pHCPhys, pPage, pPage->idx));
2532
2533 uint16_t iPhysExt = (*pHCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2534 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2535 {
2536 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2537 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2538 do
2539 {
2540 Assert(iPhysExt < pPool->cMaxPhysExts);
2541
2542 /*
2543 * Look for the shadow page and check if it's all freed.
2544 */
2545 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2546 {
2547 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2548 {
2549 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2550
2551 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2552 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2553 {
2554 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d\n", *pHCPhys, pPage->idx));
2555 return;
2556 }
2557
2558 /* we can free the node. */
2559 PVM pVM = pPool->CTXSUFF(pVM);
2560 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2561 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2562 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2563 {
2564 /* lonely node */
2565 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2566 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d lonely\n", *pHCPhys, pPage->idx));
2567 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2568 }
2569 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2570 {
2571 /* head */
2572 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d head\n", *pHCPhys, pPage->idx));
2573 *pHCPhys = (*pHCPhys & MM_RAM_FLAGS_NO_REFS_MASK)
2574 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2575 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2576 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2577 }
2578 else
2579 {
2580 /* in list */
2581 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d\n", *pHCPhys, pPage->idx));
2582 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2583 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2584 }
2585 iPhysExt = iPhysExtNext;
2586 return;
2587 }
2588 }
2589
2590 /* next */
2591 iPhysExtPrev = iPhysExt;
2592 iPhysExt = paPhysExts[iPhysExt].iNext;
2593 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2594
2595 AssertFatalMsgFailed(("not-found! cRefs=%d *pHCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, *pHCPhys, pPage, pPage->idx));
2596 }
2597 else /* nothing to do */
2598 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64\n", *pHCPhys));
2599}
2600
2601
2602
2603/**
2604 * Clear references to guest physical memory.
2605 *
2606 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2607 * is assumed to be correct, so the linear search can be skipped and we can assert
2608 * at an earlier point.
2609 *
2610 * @param pPool The pool.
2611 * @param pPage The page.
2612 * @param HCPhys The host physical address corresponding to the guest page.
2613 * @param GCPhys The guest physical address corresponding to HCPhys.
2614 */
2615static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2616{
2617 /*
2618 * Walk range list.
2619 */
2620 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2621 while (pRam)
2622 {
2623 RTGCPHYS off = GCPhys - pRam->GCPhys;
2624 if (off < pRam->cb)
2625 {
2626 /* does it match? */
2627 const unsigned iPage = off >> PAGE_SHIFT;
2628 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2629 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2630 {
2631 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2632 return;
2633 }
2634 break;
2635 }
2636 pRam = CTXSUFF(pRam->pNext);
2637 }
2638 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2639}
2640
2641
2642/**
2643 * Clear references to guest physical memory.
2644 *
2645 * @param pPool The pool.
2646 * @param pPage The page.
2647 * @param HCPhys The host physical address corresponding to the guest page.
2648 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2649 */
2650static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2651{
2652 /*
2653 * Walk range list.
2654 */
2655 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2656 while (pRam)
2657 {
2658 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2659 if (off < pRam->cb)
2660 {
2661 /* does it match? */
2662 const unsigned iPage = off >> PAGE_SHIFT;
2663 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2664 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2665 {
2666 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2667 return;
2668 }
2669 break;
2670 }
2671 pRam = CTXSUFF(pRam->pNext);
2672 }
2673
2674 /*
2675 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2676 */
2677 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2678 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2679 while (pRam)
2680 {
2681 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2682 while (iPage-- > 0)
2683 {
2684 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2685 {
2686 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2687 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2688 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2689 return;
2690 }
2691 }
2692 pRam = CTXSUFF(pRam->pNext);
2693 }
2694
2695 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2696}
2697
2698
2699/**
2700 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2701 *
2702 * @param pPool The pool.
2703 * @param pPage The page.
2704 * @param pShwPT The shadow page table (mapping of the page).
2705 * @param pGstPT The guest page table.
2706 */
2707DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2708{
2709 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2710 if (pShwPT->a[i].n.u1Present)
2711 {
2712 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2713 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2714 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2715 if (!--pPage->cPresent)
2716 break;
2717 }
2718}
2719
2720
2721/**
2722 * Clear references to guest physical memory in a PAE / 32-bit page table.
2723 *
2724 * @param pPool The pool.
2725 * @param pPage The page.
2726 * @param pShwPT The shadow page table (mapping of the page).
2727 * @param pGstPT The guest page table (just a half one).
2728 */
2729DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2730{
2731 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2732 if (pShwPT->a[i].n.u1Present)
2733 {
2734 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2735 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2736 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2737 }
2738}
2739
2740
2741/**
2742 * Clear references to guest physical memory in a PAE / PAE page table.
2743 *
2744 * @param pPool The pool.
2745 * @param pPage The page.
2746 * @param pShwPT The shadow page table (mapping of the page).
2747 * @param pGstPT The guest page table.
2748 */
2749DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2750{
2751 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2752 if (pShwPT->a[i].n.u1Present)
2753 {
2754 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
2755 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
2756 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2757 }
2758}
2759
2760
2761/**
2762 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2763 *
2764 * @param pPool The pool.
2765 * @param pPage The page.
2766 * @param pShwPT The shadow page table (mapping of the page).
2767 */
2768DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2769{
2770 RTGCPHYS GCPhys = pPage->GCPhys;
2771 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2772 if (pShwPT->a[i].n.u1Present)
2773 {
2774 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2775 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2776 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2777 }
2778}
2779
2780
2781/**
2782 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2783 *
2784 * @param pPool The pool.
2785 * @param pPage The page.
2786 * @param pShwPT The shadow page table (mapping of the page).
2787 */
2788DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2789{
2790 RTGCPHYS GCPhys = pPage->GCPhys;
2791 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2792 if (pShwPT->a[i].n.u1Present)
2793 {
2794 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2795 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
2796 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2797 }
2798}
2799#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2800
2801/**
2802 * Clear references to shadowed pages in a PAE page directory.
2803 *
2804 * @param pPool The pool.
2805 * @param pPage The page.
2806 * @param pShwPD The shadow page directory (mapping of the page).
2807 */
2808DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2809{
2810 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2811 {
2812 if (pShwPD->a[i].n.u1Present)
2813 {
2814 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2815 if (pSubPage)
2816 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2817 else
2818 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2819 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2820 }
2821 }
2822}
2823
2824
2825/**
2826 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2827 *
2828 * @param pPool The pool.
2829 * @param pPage The page.
2830 * @param pShwPdPtr The shadow page directory pointer table (mapping of the page).
2831 */
2832DECLINLINE(void) pgmPoolTrackDerefPDPTR64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPTR pShwPdPtr)
2833{
2834 for (unsigned i = 0; i < ELEMENTS(pShwPdPtr->a); i++)
2835 {
2836 if (pShwPdPtr->a[i].n.u1Present)
2837 {
2838 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPdPtr->a[i].u & X86_PDPE_PG_MASK);
2839 if (pSubPage)
2840 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2841 else
2842 AssertFatalMsgFailed(("%RX64\n", pShwPdPtr->a[i].u & X86_PDPE_PG_MASK));
2843 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2844 }
2845 }
2846}
2847
2848
2849/**
2850 * Clears all references made by this page.
2851 *
2852 * This includes other shadow pages and GC physical addresses.
2853 *
2854 * @param pPool The pool.
2855 * @param pPage The page.
2856 */
2857static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2858{
2859 /*
2860 * Map the shadow page and take action according to the page kind.
2861 */
2862 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2863 switch (pPage->enmKind)
2864 {
2865#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2866 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2867 {
2868 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2869 void *pvGst;
2870 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2871 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2872 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2873 break;
2874 }
2875
2876 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2877 {
2878 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2879 void *pvGst;
2880 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2881 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2882 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2883 break;
2884 }
2885
2886 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2887 {
2888 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2889 void *pvGst;
2890 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2891 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
2892 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2893 break;
2894 }
2895
2896 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
2897 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2898 {
2899 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2900 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
2901 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2902 break;
2903 }
2904
2905 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
2906 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2907 {
2908 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2909 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
2910 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2911 break;
2912 }
2913
2914#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2915 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2916 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2917 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2918 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2919 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2920 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2921 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2922 break;
2923#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2924
2925 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2926 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2927 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
2928 break;
2929
2930 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2931 pgmPoolTrackDerefPDPTR64Bit(pPool, pPage, (PX86PDPTR)pvShw);
2932 break;
2933
2934 default:
2935 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
2936 }
2937
2938 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
2939 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2940 ASMMemZeroPage(pvShw);
2941 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2942 pPage->fZeroed = true;
2943}
2944#endif /* PGMPOOL_WITH_USER_TRACKING */
2945
2946
2947/**
2948 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
2949 *
2950 * @param pPool The pool.
2951 */
2952static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
2953{
2954 /*
2955 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
2956 */
2957 Assert(NIL_PGMPOOL_IDX == 0);
2958 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
2959 {
2960 /*
2961 * Get the page address.
2962 */
2963 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2964 union
2965 {
2966 uint64_t *pau64;
2967 uint32_t *pau32;
2968 } u;
2969 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2970
2971 /*
2972 * Mark stuff not present.
2973 */
2974 switch (pPage->enmKind)
2975 {
2976 case PGMPOOLKIND_ROOT_32BIT_PD:
2977 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
2978 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2979 u.pau32[iPage] = 0;
2980 break;
2981
2982 case PGMPOOLKIND_ROOT_PAE_PD:
2983 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * 4; iPage++)
2984 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2985 u.pau64[iPage] = 0;
2986 break;
2987
2988 case PGMPOOLKIND_ROOT_PML4:
2989 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
2990 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
2991 u.pau64[iPage] = 0;
2992 break;
2993
2994 case PGMPOOLKIND_ROOT_PDPTR:
2995 /* Not root of shadowed pages currently, ignore it. */
2996 break;
2997 }
2998 }
2999
3000 /*
3001 * Paranoia (to be removed), flag a global CR3 sync.
3002 */
3003 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3004}
3005
3006
3007/**
3008 * Flushes the entire cache.
3009 *
3010 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3011 * and execute this CR3 flush.
3012 *
3013 * @param pPool The pool.
3014 */
3015static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3016{
3017 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3018 LogFlow(("pgmPoolFlushAllInt:\n"));
3019
3020 /*
3021 * If there are no pages in the pool, there is nothing to do.
3022 */
3023 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3024 {
3025 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3026 return;
3027 }
3028
3029 /*
3030 * Nuke the free list and reinsert all pages into it.
3031 */
3032 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3033 {
3034 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3035
3036#ifdef IN_RING3
3037 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3038#endif
3039#ifdef PGMPOOL_WITH_MONITORING
3040 if (pPage->fMonitored)
3041 pgmPoolMonitorFlush(pPool, pPage);
3042 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3043 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3044 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3045 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3046 pPage->cModifications = 0;
3047#endif
3048 pPage->GCPhys = NIL_RTGCPHYS;
3049 pPage->enmKind = PGMPOOLKIND_FREE;
3050 Assert(pPage->idx == i);
3051 pPage->iNext = i + 1;
3052 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3053 pPage->fSeenNonGlobal = false;
3054 pPage->fMonitored= false;
3055 pPage->fCached = false;
3056 pPage->fReusedFlushPending = false;
3057 pPage->fCR3Mix = false;
3058#ifdef PGMPOOL_WITH_USER_TRACKING
3059 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3060#endif
3061#ifdef PGMPOOL_WITH_CACHE
3062 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3063 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3064#endif
3065 }
3066 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3067 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3068 pPool->cUsedPages = 0;
3069
3070#ifdef PGMPOOL_WITH_USER_TRACKING
3071 /*
3072 * Zap and reinitialize the user records.
3073 */
3074 pPool->cPresent = 0;
3075 pPool->iUserFreeHead = 0;
3076 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3077 const unsigned cMaxUsers = pPool->cMaxUsers;
3078 for (unsigned i = 0; i < cMaxUsers; i++)
3079 {
3080 paUsers[i].iNext = i + 1;
3081 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3082 paUsers[i].iUserTable = 0xfffe;
3083 }
3084 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3085#endif
3086
3087#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3088 /*
3089 * Clear all the GCPhys links and rebuild the phys ext free list.
3090 */
3091 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
3092 pRam;
3093 pRam = pRam->CTXSUFF(pNext))
3094 {
3095 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3096 while (iPage-- > 0)
3097 pRam->aHCPhys[iPage] &= MM_RAM_FLAGS_NO_REFS_MASK;
3098 }
3099
3100 pPool->iPhysExtFreeHead = 0;
3101 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3102 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3103 for (unsigned i = 0; i < cMaxPhysExts; i++)
3104 {
3105 paPhysExts[i].iNext = i + 1;
3106 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3107 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3108 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3109 }
3110 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3111#endif
3112
3113#ifdef PGMPOOL_WITH_MONITORING
3114 /*
3115 * Just zap the modified list.
3116 */
3117 pPool->cModifiedPages = 0;
3118 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3119#endif
3120
3121#ifdef PGMPOOL_WITH_CACHE
3122 /*
3123 * Clear the GCPhys hash and the age list.
3124 */
3125 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3126 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3127 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3128 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3129#endif
3130
3131 /*
3132 * Flush all the special root pages.
3133 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3134 */
3135 pgmPoolFlushAllSpecialRoots(pPool);
3136 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3137 {
3138 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3139 pPage->iNext = NIL_PGMPOOL_IDX;
3140#ifdef PGMPOOL_WITH_MONITORING
3141 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3142 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3143 pPage->cModifications = 0;
3144 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3145 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3146 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3147 if (pPage->fMonitored)
3148 {
3149 PVM pVM = pPool->CTXSUFF(pVM);
3150 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3151 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pPage),
3152 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pPage),
3153 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pPage),
3154 pPool->pszAccessHandler);
3155 AssertFatalRCSuccess(rc);
3156# ifdef PGMPOOL_WITH_CACHE
3157 pgmPoolHashInsert(pPool, pPage);
3158# endif
3159 }
3160#endif
3161#ifdef PGMPOOL_WITH_USER_TRACKING
3162 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3163#endif
3164#ifdef PGMPOOL_WITH_CACHE
3165 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3166 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3167#endif
3168 }
3169
3170 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3171}
3172
3173
3174/**
3175 * Flushes a pool page.
3176 *
3177 * This moves the page to the free list after removing all user references to it.
3178 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3179 *
3180 * @returns VBox status code.
3181 * @retval VINF_SUCCESS on success.
3182 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3183 * @param pPool The pool.
3184 * @param HCPhys The HC physical address of the shadow page.
3185 */
3186int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3187{
3188 int rc = VINF_SUCCESS;
3189 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3190 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3191 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3192
3193 /*
3194 * Quietly reject any attempts at flushing any of the special root pages.
3195 */
3196 if (pPage->idx < PGMPOOL_IDX_FIRST)
3197 {
3198 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3199 return VINF_SUCCESS;
3200 }
3201
3202 /*
3203 * Mark the page as being in need of a ASMMemZeroPage().
3204 */
3205 pPage->fZeroed = false;
3206
3207#ifdef PGMPOOL_WITH_USER_TRACKING
3208 /*
3209 * Clear the page.
3210 */
3211 pgmPoolTrackClearPageUsers(pPool, pPage);
3212 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3213 pgmPoolTrackDeref(pPool, pPage);
3214 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3215#endif
3216
3217#ifdef PGMPOOL_WITH_CACHE
3218 /*
3219 * Flush it from the cache.
3220 */
3221 pgmPoolCacheFlushPage(pPool, pPage);
3222#endif /* PGMPOOL_WITH_CACHE */
3223
3224#ifdef PGMPOOL_WITH_MONITORING
3225 /*
3226 * Deregistering the monitoring.
3227 */
3228 if (pPage->fMonitored)
3229 rc = pgmPoolMonitorFlush(pPool, pPage);
3230#endif
3231
3232 /*
3233 * Free the page.
3234 */
3235 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3236 pPage->iNext = pPool->iFreeHead;
3237 pPool->iFreeHead = pPage->idx;
3238 pPage->enmKind = PGMPOOLKIND_FREE;
3239 pPage->GCPhys = NIL_RTGCPHYS;
3240 pPage->fReusedFlushPending = false;
3241
3242 pPool->cUsedPages--;
3243 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3244 return rc;
3245}
3246
3247
3248/**
3249 * Frees a usage of a pool page.
3250 *
3251 * The caller is responsible to updating the user table so that it no longer
3252 * references the shadow page.
3253 *
3254 * @param pPool The pool.
3255 * @param HCPhys The HC physical address of the shadow page.
3256 * @param iUser The shadow page pool index of the user table.
3257 * @param iUserTable The index into the user table (shadowed).
3258 */
3259void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3260{
3261 STAM_PROFILE_START(&pPool->StatFree, a);
3262 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3263 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3264 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3265#ifdef PGMPOOL_WITH_USER_TRACKING
3266 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3267#endif
3268#ifdef PGMPOOL_WITH_CACHE
3269 if (!pPage->fCached)
3270#endif
3271 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3272 STAM_PROFILE_STOP(&pPool->StatFree, a);
3273}
3274
3275
3276/**
3277 * Makes one or more free page free.
3278 *
3279 * @returns VBox status code.
3280 * @retval VINF_SUCCESS on success.
3281 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3282 *
3283 * @param pPool The pool.
3284 * @param iUser The user of the page.
3285 */
3286static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3287{
3288 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3289
3290 /*
3291 * If the pool isn't full grown yet, expand it.
3292 */
3293 if (pPool->cCurPages < pPool->cMaxPages)
3294 {
3295 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3296#ifdef IN_RING3
3297 int rc = PGMR3PoolGrow(pPool->pVMHC);
3298#else
3299 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3300#endif
3301 if (VBOX_FAILURE(rc))
3302 return rc;
3303 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3304 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3305 return VINF_SUCCESS;
3306 }
3307
3308#ifdef PGMPOOL_WITH_CACHE
3309 /*
3310 * Free one cached page.
3311 */
3312 return pgmPoolCacheFreeOne(pPool, iUser);
3313#else
3314 /*
3315 * Flush the pool.
3316 * If we have tracking enabled, it should be possible to come up with
3317 * a cheap replacement strategy...
3318 */
3319 pgmPoolFlushAllInt(pPool);
3320 return VERR_PGM_POOL_FLUSHED;
3321#endif
3322}
3323
3324
3325/**
3326 * Allocates a page from the pool.
3327 *
3328 * This page may actually be a cached page and not in need of any processing
3329 * on the callers part.
3330 *
3331 * @returns VBox status code.
3332 * @retval VINF_SUCCESS if a NEW page was allocated.
3333 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3334 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3335 * @param pVM The VM handle.
3336 * @param GCPhys The GC physical address of the page we're gonna shadow.
3337 * For 4MB and 2MB PD entries, it's the first address the
3338 * shadow PT is covering.
3339 * @param enmKind The kind of mapping.
3340 * @param iUser The shadow page pool index of the user table.
3341 * @param iUserTable The index into the user table (shadowed).
3342 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3343 */
3344int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3345{
3346 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3347 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3348 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3349
3350 *ppPage = NULL;
3351
3352#ifdef PGMPOOL_WITH_CACHE
3353 if (pPool->fCacheEnabled)
3354 {
3355 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3356 if (VBOX_SUCCESS(rc2))
3357 {
3358 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3359 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3360 return rc2;
3361 }
3362 }
3363#endif
3364
3365 /*
3366 * Allocate a new one.
3367 */
3368 int rc = VINF_SUCCESS;
3369 uint16_t iNew = pPool->iFreeHead;
3370 if (iNew == NIL_PGMPOOL_IDX)
3371 {
3372 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3373 if (VBOX_FAILURE(rc))
3374 {
3375 if (rc != VERR_PGM_POOL_CLEARED)
3376 {
3377 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3378 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3379 return rc;
3380 }
3381 rc = VERR_PGM_POOL_FLUSHED;
3382 }
3383 iNew = pPool->iFreeHead;
3384 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3385 }
3386
3387 /* unlink the free head */
3388 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3389 pPool->iFreeHead = pPage->iNext;
3390 pPage->iNext = NIL_PGMPOOL_IDX;
3391
3392 /*
3393 * Initialize it.
3394 */
3395 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3396 pPage->enmKind = enmKind;
3397 pPage->GCPhys = GCPhys;
3398 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3399 pPage->fMonitored = false;
3400 pPage->fCached = false;
3401 pPage->fReusedFlushPending = false;
3402 pPage->fCR3Mix = false;
3403#ifdef PGMPOOL_WITH_MONITORING
3404 pPage->cModifications = 0;
3405 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3406 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3407#endif
3408#ifdef PGMPOOL_WITH_USER_TRACKING
3409 pPage->cPresent = 0;
3410 pPage->iFirstPresent = ~0;
3411
3412 /*
3413 * Insert into the tracking and cache. If this fails, free the page.
3414 */
3415 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3416 if (VBOX_FAILURE(rc3))
3417 {
3418 if (rc3 != VERR_PGM_POOL_CLEARED)
3419 {
3420 pPool->cUsedPages--;
3421 pPage->enmKind = PGMPOOLKIND_FREE;
3422 pPage->GCPhys = NIL_RTGCPHYS;
3423 pPage->iNext = pPool->iFreeHead;
3424 pPool->iFreeHead = pPage->idx;
3425 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3426 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3427 return rc3;
3428 }
3429 rc = VERR_PGM_POOL_FLUSHED;
3430 }
3431#endif /* PGMPOOL_WITH_USER_TRACKING */
3432
3433 /*
3434 * Commit the allocation, clear the page and return.
3435 */
3436#ifdef VBOX_WITH_STATISTICS
3437 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3438 pPool->cUsedPagesHigh = pPool->cUsedPages;
3439#endif
3440
3441 if (!pPage->fZeroed)
3442 {
3443 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3444 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3445 ASMMemZeroPage(pv);
3446 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3447 }
3448
3449 *ppPage = pPage;
3450 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3451 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3452 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3453 return rc;
3454}
3455
3456
3457/**
3458 * Frees a usage of a pool page.
3459 *
3460 * @param pVM The VM handle.
3461 * @param HCPhys The HC physical address of the shadow page.
3462 * @param iUser The shadow page pool index of the user table.
3463 * @param iUserTable The index into the user table (shadowed).
3464 */
3465void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3466{
3467 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3468 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3469 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3470}
3471
3472
3473/**
3474 * Gets a in-use page in the pool by it's physical address.
3475 *
3476 * @returns Pointer to the page.
3477 * @param pVM The VM handle.
3478 * @param HCPhys The HC physical address of the shadow page.
3479 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3480 */
3481PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3482{
3483 /** @todo profile this! */
3484 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3485 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3486 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3487 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3488 return pPage;
3489}
3490
3491
3492/**
3493 * Flushes the entire cache.
3494 *
3495 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3496 * and execute this CR3 flush.
3497 *
3498 * @param pPool The pool.
3499 */
3500void pgmPoolFlushAll(PVM pVM)
3501{
3502 LogFlow(("pgmPoolFlushAll:\n"));
3503 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3504}
3505
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette