VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 4620

Last change on this file since 4620 was 4620, checked in by vboxsync, 17 years ago

The initial PGMRAMRANGE::aHCPhys -> PGMRAMRANGE::aPages (PGMPAGE) conversion.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 119.9 KB
Line 
1/* $Id: PGMAllPool.cpp 4620 2007-09-08 00:39:30Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_GC
28# include <VBox/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vm.h>
32#include <VBox/disopcode.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37
38
39/*******************************************************************************
40* Internal Functions *
41*******************************************************************************/
42__BEGIN_DECLS
43static void pgmPoolFlushAllInt(PPGMPOOL pPool);
44#ifdef PGMPOOL_WITH_USER_TRACKING
45DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
46DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
47static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
48#endif
49#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
50static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
51#endif
52#ifdef PGMPOOL_WITH_CACHE
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
54#endif
55#ifdef PGMPOOL_WITH_MONITORING
56static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
57#endif
58#ifndef IN_RING3
59DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
60#endif
61__END_DECLS
62
63
64/**
65 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
66 *
67 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
68 * @param enmKind The page kind.
69 */
70DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
71{
72 switch (enmKind)
73 {
74 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
75 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
76 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
77 return true;
78 default:
79 return false;
80 }
81}
82
83
84#ifdef IN_GC
85/**
86 * Maps a pool page into the current context.
87 *
88 * @returns Pointer to the mapping.
89 * @param pVM The VM handle.
90 * @param pPage The page to map.
91 */
92void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
93{
94 /* general pages. */
95 if (pPage->idx >= PGMPOOL_IDX_FIRST)
96 {
97 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
98 void *pv;
99 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
100 AssertReleaseRC(rc);
101 return pv;
102 }
103
104 /* special pages. */
105 switch (pPage->idx)
106 {
107 case PGMPOOL_IDX_PD:
108 return pVM->pgm.s.pGC32BitPD;
109 case PGMPOOL_IDX_PAE_PD:
110 return pVM->pgm.s.apGCPaePDs[0];
111 case PGMPOOL_IDX_PDPTR:
112 return pVM->pgm.s.pGCPaePDPTR;
113 case PGMPOOL_IDX_PML4:
114 return pVM->pgm.s.pGCPaePML4;
115 default:
116 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
117 return NULL;
118 }
119}
120#endif /* IN_GC */
121
122
123#ifdef PGMPOOL_WITH_MONITORING
124/**
125 * Determin the size of a write instruction.
126 * @returns number of bytes written.
127 * @param pDis The disassembler state.
128 */
129static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
130{
131 /*
132 * This is very crude and possibly wrong for some opcodes,
133 * but since it's not really supposed to be called we can
134 * probably live with that.
135 */
136 return DISGetParamSize(pDis, &pDis->param1);
137}
138
139
140/**
141 * Flushes a chain of pages sharing the same access monitor.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pPool The pool.
145 * @param pPage A page in the chain.
146 */
147int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
148{
149 /*
150 * Find the list head.
151 */
152 uint16_t idx = pPage->idx;
153 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
154 {
155 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
156 {
157 idx = pPage->iMonitoredPrev;
158 Assert(idx != pPage->idx);
159 pPage = &pPool->aPages[idx];
160 }
161 }
162
163 /*
164 * Itereate the list flushing each shadow page.
165 */
166 int rc = VINF_SUCCESS;
167 for (;;)
168 {
169 idx = pPage->iMonitoredNext;
170 Assert(idx != pPage->idx);
171 if (pPage->idx >= PGMPOOL_IDX_FIRST)
172 {
173 int rc2 = pgmPoolFlushPage(pPool, pPage);
174 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
175 rc = VINF_PGM_SYNC_CR3;
176 }
177 /* next */
178 if (idx == NIL_PGMPOOL_IDX)
179 break;
180 pPage = &pPool->aPages[idx];
181 }
182 return rc;
183}
184
185
186/**
187 * Wrapper for getting the current context pointer to the entry being modified.
188 *
189 * @returns Pointer to the current context mapping of the entry.
190 * @param pPool The pool.
191 * @param pvFault The fault virtual address.
192 * @param GCPhysFault The fault physical address.
193 * @param cbEntry The entry size.
194 */
195#ifdef IN_RING3
196DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
197#else
198DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
199#endif
200{
201#ifdef IN_GC
202 return (RTGCPTR)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
203
204#elif defined(IN_RING0)
205 void *pvRet;
206 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
207 AssertFatalRCSuccess(rc);
208 return pvRet;
209
210#elif defined(IN_RING3)
211 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
212#else
213# error "huh?"
214#endif
215}
216
217
218/**
219 * Process shadow entries before they are changed by the guest.
220 *
221 * For PT entries we will clear them. For PD entries, we'll simply check
222 * for mapping conflicts and set the SyncCR3 FF if found.
223 *
224 * @param pPool The pool.
225 * @param pPage The head page.
226 * @param GCPhysFault The guest physical fault address.
227 * @param uAddress In R0 and GC this is the guest context fault address (flat).
228 * In R3 this is the host context 'fault' address.
229 * @param pCpu The disassembler state for figuring out the write size.
230 * This need not be specified if the caller knows we won't do cross entry accesses.
231 */
232#ifdef IN_RING3
233void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
234#else
235void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
236#endif
237{
238 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
239 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
240 for (;;)
241 {
242 union
243 {
244 void *pv;
245 PX86PT pPT;
246 PX86PTPAE pPTPae;
247 PX86PD pPD;
248 PX86PDPAE pPDPae;
249 } uShw;
250 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
251
252 switch (pPage->enmKind)
253 {
254 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
255 {
256 const unsigned iShw = off / sizeof(X86PTE);
257 if (uShw.pPT->a[iShw].n.u1Present)
258 {
259# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
260 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
261 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
262 pgmPoolTracDerefGCPhysHint(pPool, pPage,
263 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
264 pGstPte->u & X86_PTE_PG_MASK);
265# endif
266 uShw.pPT->a[iShw].u = 0;
267 }
268 break;
269 }
270
271 /* page/2 sized */
272 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
273 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
274 {
275 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
276 if (uShw.pPTPae->a[iShw].n.u1Present)
277 {
278# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
279 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
280 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
281 pgmPoolTracDerefGCPhysHint(pPool, pPage,
282 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
283 pGstPte->u & X86_PTE_PG_MASK);
284# endif
285 uShw.pPTPae->a[iShw].u = 0;
286 }
287 }
288 break;
289
290 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
291 {
292 const unsigned iShw = off / sizeof(X86PTPAE);
293 if (uShw.pPTPae->a[iShw].n.u1Present)
294 {
295# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
296 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
297 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
298 pgmPoolTracDerefGCPhysHint(pPool, pPage,
299 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
300 pGstPte->u & X86_PTE_PAE_PG_MASK);
301# endif
302 uShw.pPTPae->a[iShw].u = 0;
303 }
304 break;
305 }
306
307 case PGMPOOLKIND_ROOT_32BIT_PD:
308 {
309 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
310 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
311 {
312 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
313 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
314 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
315 }
316 /* paranoia / a bit assumptive. */
317 else if ( pCpu
318 && (off & 4)
319 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
320 {
321 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
322 if ( iShw2 != iShw
323 && iShw2 < ELEMENTS(uShw.pPD->a)
324 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
325 {
326 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
327 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
328 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
329 }
330 }
331#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
332 if ( uShw.pPD->a[iShw].n.u1Present
333 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
334 {
335 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
336# ifdef IN_GC /* TLB load - we're pushing things a bit... */
337 ASMProbeReadByte(pvAddress);
338# endif
339 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
340 uShw.pPD->a[iShw].u = 0;
341 }
342#endif
343 break;
344 }
345
346 case PGMPOOLKIND_ROOT_PAE_PD:
347 {
348 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
349 for (unsigned i = 0; i < 2; i++, iShw++)
350 {
351 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
352 {
353 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
354 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
355 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
356 }
357 /* paranoia / a bit assumptive. */
358 else if ( pCpu
359 && (off & 4)
360 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
361 {
362 const unsigned iShw2 = iShw + 2;
363 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
364 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
365 {
366 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
367 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
368 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
369 }
370 }
371#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
372 if ( uShw.pPDPae->a[iShw].n.u1Present
373 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
374 {
375 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
376# ifdef IN_GC /* TLB load - we're pushing things a bit... */
377 ASMProbeReadByte(pvAddress);
378# endif
379 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
380 uShw.pPDPae->a[iShw].u = 0;
381 }
382#endif
383 }
384 break;
385 }
386
387 default:
388 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
389 }
390
391 /* next */
392 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
393 return;
394 pPage = &pPool->aPages[pPage->iMonitoredNext];
395 }
396}
397
398
399# ifndef IN_RING3
400/**
401 * Checks if a access could be a fork operation in progress.
402 *
403 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
404 *
405 * @returns true if it's likly that we're forking, otherwise false.
406 * @param pPool The pool.
407 * @param pCpu The disassembled instruction.
408 * @param offFault The access offset.
409 */
410DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
411{
412 /*
413 * i386 linux is using btr to clear X86_PTE_RW.
414 * The functions involved are (2.6.16 source inspection):
415 * clear_bit
416 * ptep_set_wrprotect
417 * copy_one_pte
418 * copy_pte_range
419 * copy_pmd_range
420 * copy_pud_range
421 * copy_page_range
422 * dup_mmap
423 * dup_mm
424 * copy_mm
425 * copy_process
426 * do_fork
427 */
428 if ( pCpu->pCurInstr->opcode == OP_BTR
429 && !(offFault & 4)
430 /** @todo Validate that the bit index is X86_PTE_RW. */
431 )
432 {
433 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
434 return true;
435 }
436 return false;
437}
438
439
440/**
441 * Determin whether the page is likely to have been reused.
442 *
443 * @returns true if we consider the page as being reused for a different purpose.
444 * @returns false if we consider it to still be a paging page.
445 * @param pPage The page in question.
446 * @param pCpu The disassembly info for the faulting insturction.
447 * @param pvFault The fault address.
448 *
449 * @remark The REP prefix check is left to the caller because of STOSD/W.
450 */
451DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
452{
453 switch (pCpu->pCurInstr->opcode)
454 {
455 case OP_PUSH:
456 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
457 return true;
458 case OP_PUSHF:
459 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
460 return true;
461 case OP_PUSHA:
462 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
463 return true;
464 case OP_FXSAVE:
465 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
466 return true;
467 }
468 if ( (pCpu->param1.flags & USE_REG_GEN32)
469 && (pCpu->param1.base.reg_gen32 == USE_REG_ESP))
470 {
471 Log4(("pgmPoolMonitorIsReused: ESP\n"));
472 return true;
473 }
474
475 //if (pPage->fCR3Mix)
476 // return false;
477 return false;
478}
479
480
481/**
482 * Flushes the page being accessed.
483 *
484 * @returns VBox status code suitable for scheduling.
485 * @param pVM The VM handle.
486 * @param pPool The pool.
487 * @param pPage The pool page (head).
488 * @param pCpu The disassembly of the write instruction.
489 * @param pRegFrame The trap register frame.
490 * @param GCPhysFault The fault address as guest physical address.
491 * @param pvFault The fault address.
492 */
493static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
494 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
495{
496 /*
497 * First, do the flushing.
498 */
499 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
500
501 /*
502 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
503 */
504 uint32_t cbWritten;
505 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
506 if (VBOX_SUCCESS(rc2))
507 pRegFrame->eip += pCpu->opsize;
508 else if (rc2 == VERR_EM_INTERPRETER)
509 {
510#ifdef IN_GC
511 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip))
512 {
513 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
514 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
515 rc = VINF_SUCCESS;
516 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
517 }
518 else
519#endif
520 {
521 rc = VINF_EM_RAW_EMULATE_INSTR;
522 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
523 }
524 }
525 else
526 rc = rc2;
527
528 /* See use in pgmPoolAccessHandlerSimple(). */
529 PGM_INVL_GUEST_TLBS();
530
531 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
532 return rc;
533
534}
535
536
537/**
538 * Handles the STOSD write accesses.
539 *
540 * @returns VBox status code suitable for scheduling.
541 * @param pVM The VM handle.
542 * @param pPool The pool.
543 * @param pPage The pool page (head).
544 * @param pCpu The disassembly of the write instruction.
545 * @param pRegFrame The trap register frame.
546 * @param GCPhysFault The fault address as guest physical address.
547 * @param pvFault The fault address.
548 */
549DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
550 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
551{
552 /*
553 * Increment the modification counter and insert it into the list
554 * of modified pages the first time.
555 */
556 if (!pPage->cModifications++)
557 pgmPoolMonitorModifiedInsert(pPool, pPage);
558
559 /*
560 * Execute REP STOSD.
561 *
562 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
563 * write situation, meaning that it's safe to write here.
564 */
565#ifdef IN_GC
566 uint32_t *pu32 = (uint32_t *)pvFault;
567#else
568 RTGCPTR pu32 = pvFault;
569#endif
570 while (pRegFrame->ecx)
571 {
572 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pu32, NULL);
573#ifdef IN_GC
574 *pu32++ = pRegFrame->eax;
575#else
576 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
577 pu32 += 4;
578#endif
579 GCPhysFault += 4;
580 pRegFrame->edi += 4;
581 pRegFrame->ecx--;
582 }
583 pRegFrame->eip += pCpu->opsize;
584
585 /* See use in pgmPoolAccessHandlerSimple(). */
586 PGM_INVL_GUEST_TLBS();
587
588 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
589 return VINF_SUCCESS;
590}
591
592
593/**
594 * Handles the simple write accesses.
595 *
596 * @returns VBox status code suitable for scheduling.
597 * @param pVM The VM handle.
598 * @param pPool The pool.
599 * @param pPage The pool page (head).
600 * @param pCpu The disassembly of the write instruction.
601 * @param pRegFrame The trap register frame.
602 * @param GCPhysFault The fault address as guest physical address.
603 * @param pvFault The fault address.
604 */
605DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
606 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
607{
608 /*
609 * Increment the modification counter and insert it into the list
610 * of modified pages the first time.
611 */
612 if (!pPage->cModifications++)
613 pgmPoolMonitorModifiedInsert(pPool, pPage);
614
615 /*
616 * Clear all the pages. ASSUMES that pvFault is readable.
617 */
618 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
619
620 /*
621 * Interpret the instruction.
622 */
623 uint32_t cb;
624 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
625 if (VBOX_SUCCESS(rc))
626 pRegFrame->eip += pCpu->opsize;
627 else if (rc == VERR_EM_INTERPRETER)
628 {
629# ifdef IN_GC
630 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)(RTGCUINTPTR)pCpu->opaddr))
631 {
632 /* We're not able to handle this in ring-3, so fix the interpreter! */
633 /** @note Should be fine. There's no need to flush the whole thing. */
634#ifndef DEBUG_sandervl
635 AssertMsgFailed(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
636 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
637#endif
638 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch1);
639 rc = pgmPoolMonitorChainFlush(pPool, pPage);
640 }
641 else
642# endif
643 {
644 rc = VINF_EM_RAW_EMULATE_INSTR;
645 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
646 }
647 }
648
649 /*
650 * Quick hack, with logging enabled we're getting stale
651 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
652 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
653 * have to be fixed to support this. But that'll have to wait till next week.
654 *
655 * An alternative is to keep track of the changed PTEs together with the
656 * GCPhys from the guest PT. This may proove expensive though.
657 *
658 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
659 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
660 */
661 PGM_INVL_GUEST_TLBS();
662
663 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
664 return rc;
665}
666
667
668/**
669 * \#PF Handler callback for PT write accesses.
670 *
671 * @returns VBox status code (appropriate for GC return).
672 * @param pVM VM Handle.
673 * @param uErrorCode CPU Error code.
674 * @param pRegFrame Trap register frame.
675 * NULL on DMA and other non CPU access.
676 * @param pvFault The fault address (cr2).
677 * @param GCPhysFault The GC physical address corresponding to pvFault.
678 * @param pvUser User argument.
679 */
680DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
681{
682 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
683 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
684 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
685 LogFlow(("pgmPoolAccessHandler: pvFault=%p pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
686
687 /*
688 * We should ALWAYS have the list head as user parameter. This
689 * is because we use that page to record the changes.
690 */
691 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
692
693 /*
694 * Disassemble the faulting instruction.
695 */
696 DISCPUSTATE Cpu;
697 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
698 AssertRCReturn(rc, rc);
699
700 /*
701 * Check if it's worth dealing with.
702 */
703 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
704 || pPage->fCR3Mix)
705 && !pgmPoolMonitorIsReused(pPage, &Cpu,pvFault)
706 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
707 {
708 /*
709 * Simple instructions, no REP prefix.
710 */
711 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
712 {
713 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
714 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
715 return rc;
716 }
717
718 /*
719 * Windows is frequently doing small memset() operations (netio test 4k+).
720 * We have to deal with these or we'll kill the cache and performance.
721 */
722 if ( Cpu.pCurInstr->opcode == OP_STOSWD
723 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
724 && pRegFrame->ecx <= 0x20
725 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
726 && !((uintptr_t)pvFault & 3)
727 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
728 && Cpu.mode == CPUMODE_32BIT
729 && Cpu.opmode == CPUMODE_32BIT
730 && Cpu.addrmode == CPUMODE_32BIT
731 && Cpu.prefix == PREFIX_REP
732 && !pRegFrame->eflags.Bits.u1DF
733 )
734 {
735 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
736 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
737 return rc;
738 }
739
740 /* REP prefix, don't bother. */
741 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
742 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
743 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
744 }
745
746 /*
747 * Not worth it, so flush it.
748 */
749 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
750 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
751 return rc;
752}
753
754# endif /* !IN_RING3 */
755#endif /* PGMPOOL_WITH_MONITORING */
756
757
758
759#ifdef PGMPOOL_WITH_CACHE
760/**
761 * Inserts a page into the GCPhys hash table.
762 *
763 * @param pPool The pool.
764 * @param pPage The page.
765 */
766DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
767{
768 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
769 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
770 pPage->iNext = pPool->aiHash[iHash];
771 pPool->aiHash[iHash] = pPage->idx;
772}
773
774
775/**
776 * Removes a page from the GCPhys hash table.
777 *
778 * @param pPool The pool.
779 * @param pPage The page.
780 */
781DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
782{
783 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
784 if (pPool->aiHash[iHash] == pPage->idx)
785 pPool->aiHash[iHash] = pPage->iNext;
786 else
787 {
788 uint16_t iPrev = pPool->aiHash[iHash];
789 for (;;)
790 {
791 const int16_t i = pPool->aPages[iPrev].iNext;
792 if (i == pPage->idx)
793 {
794 pPool->aPages[iPrev].iNext = pPage->iNext;
795 break;
796 }
797 if (i == NIL_PGMPOOL_IDX)
798 {
799 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
800 break;
801 }
802 iPrev = i;
803 }
804 }
805 pPage->iNext = NIL_PGMPOOL_IDX;
806}
807
808
809/**
810 * Frees up one cache page.
811 *
812 * @returns VBox status code.
813 * @retval VINF_SUCCESS on success.
814 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
815 * @param pPool The pool.
816 * @param iUser The user index.
817 */
818static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
819{
820 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
821 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
822
823 /*
824 * Select one page from the tail of the age list.
825 */
826 uint16_t iToFree = pPool->iAgeTail;
827 if (iToFree == iUser)
828 iToFree = pPool->aPages[iToFree].iAgePrev;
829/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
830 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
831 {
832 uint16_t i = pPool->aPages[iToFree].iAgePrev;
833 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
834 {
835 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
836 continue;
837 iToFree = i;
838 break;
839 }
840 }
841*/
842 Assert(iToFree != iUser);
843 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
844
845 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
846 if (rc == VINF_SUCCESS)
847 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
848 return rc;
849}
850
851
852/**
853 * Checks if a kind mismatch is really a page being reused
854 * or if it's just normal remappings.
855 *
856 * @returns true if reused and the cached page (enmKind1) should be flushed
857 * @returns false if not reused.
858 * @param enmKind1 The kind of the cached page.
859 * @param enmKind2 The kind of the requested page.
860 */
861static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
862{
863 switch (enmKind1)
864 {
865 /*
866 * Never reuse them. There is no remapping in non-paging mode.
867 */
868 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
869 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
870 return true;
871
872 /*
873 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
874 */
875 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
876 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
877 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
878 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
879 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
880 switch (enmKind2)
881 {
882 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
883 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
884 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
885 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
886 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
887 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
888 return true;
889 default:
890 return false;
891 }
892
893 /*
894 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
895 */
896 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
897 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
898 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
899 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
900 switch (enmKind2)
901 {
902 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
903 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
904 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
905 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
906 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
907 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
908 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
909 return true;
910 default:
911 return false;
912 }
913
914 /*
915 * These cannot be flushed, and it's common to reuse the PDs as PTs.
916 */
917 case PGMPOOLKIND_ROOT_32BIT_PD:
918 case PGMPOOLKIND_ROOT_PAE_PD:
919 case PGMPOOLKIND_ROOT_PDPTR:
920 case PGMPOOLKIND_ROOT_PML4:
921 return false;
922
923 default:
924 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
925 }
926}
927
928
929/**
930 * Attempts to satisfy a pgmPoolAlloc request from the cache.
931 *
932 * @returns VBox status code.
933 * @retval VINF_PGM_CACHED_PAGE on success.
934 * @retval VERR_FILE_NOT_FOUND if not found.
935 * @param pPool The pool.
936 * @param GCPhys The GC physical address of the page we're gonna shadow.
937 * @param enmKind The kind of mapping.
938 * @param iUser The shadow page pool index of the user table.
939 * @param iUserTable The index into the user table (shadowed).
940 * @param ppPage Where to store the pointer to the page.
941 */
942static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
943{
944 /*
945 * Look up the GCPhys in the hash.
946 */
947 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
948 if (i != NIL_PGMPOOL_IDX)
949 {
950 do
951 {
952 PPGMPOOLPAGE pPage = &pPool->aPages[i];
953 if (pPage->GCPhys == GCPhys)
954 {
955 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
956 {
957 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
958 if (VBOX_SUCCESS(rc))
959 {
960 *ppPage = pPage;
961 STAM_COUNTER_INC(&pPool->StatCacheHits);
962 return VINF_PGM_CACHED_PAGE;
963 }
964 return rc;
965 }
966
967 /*
968 * The kind is different. In some cases we should now flush the page
969 * as it has been reused, but in most cases this is normal remapping
970 * of PDs as PT or big pages using the GCPhys field in a slightly
971 * different way than the other kinds.
972 */
973 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
974 {
975 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
976 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
977 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
978 break;
979 }
980 }
981
982 /* next */
983 i = pPage->iNext;
984 } while (i != NIL_PGMPOOL_IDX);
985 }
986
987 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
988 STAM_COUNTER_INC(&pPool->StatCacheMisses);
989 return VERR_FILE_NOT_FOUND;
990}
991
992
993/**
994 * Inserts a page into the cache.
995 *
996 * @param pPool The pool.
997 * @param pPage The cached page.
998 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
999 */
1000static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1001{
1002 /*
1003 * Insert into the GCPhys hash if the page is fit for that.
1004 */
1005 Assert(!pPage->fCached);
1006 if (fCanBeCached)
1007 {
1008 pPage->fCached = true;
1009 pgmPoolHashInsert(pPool, pPage);
1010 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1011 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1012 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1013 }
1014 else
1015 {
1016 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1017 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1018 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1019 }
1020
1021 /*
1022 * Insert at the head of the age list.
1023 */
1024 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1025 pPage->iAgeNext = pPool->iAgeHead;
1026 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1027 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1028 else
1029 pPool->iAgeTail = pPage->idx;
1030 pPool->iAgeHead = pPage->idx;
1031}
1032
1033
1034/**
1035 * Flushes a cached page.
1036 *
1037 * @param pPool The pool.
1038 * @param pPage The cached page.
1039 */
1040static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1041{
1042 /*
1043 * Remove the page from the hash.
1044 */
1045 if (pPage->fCached)
1046 {
1047 pPage->fCached = false;
1048 pgmPoolHashRemove(pPool, pPage);
1049 }
1050 else
1051 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1052
1053 /*
1054 * Remove it from the age list.
1055 */
1056 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1057 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1058 else
1059 pPool->iAgeTail = pPage->iAgePrev;
1060 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1061 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1062 else
1063 pPool->iAgeHead = pPage->iAgeNext;
1064 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1065 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1066}
1067#endif /* PGMPOOL_WITH_CACHE */
1068
1069
1070#ifdef PGMPOOL_WITH_MONITORING
1071/**
1072 * Looks for pages sharing the monitor.
1073 *
1074 * @returns Pointer to the head page.
1075 * @returns NULL if not found.
1076 * @param pPool The Pool
1077 * @param pNewPage The page which is going to be monitored.
1078 */
1079static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1080{
1081#ifdef PGMPOOL_WITH_CACHE
1082 /*
1083 * Look up the GCPhys in the hash.
1084 */
1085 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1086 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1087 if (i == NIL_PGMPOOL_IDX)
1088 return NULL;
1089 do
1090 {
1091 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1092 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1093 && pPage != pNewPage)
1094 {
1095 switch (pPage->enmKind)
1096 {
1097 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1098 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1099 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1100 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1101 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1102 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1103 case PGMPOOLKIND_ROOT_32BIT_PD:
1104 case PGMPOOLKIND_ROOT_PAE_PD:
1105 case PGMPOOLKIND_ROOT_PDPTR:
1106 case PGMPOOLKIND_ROOT_PML4:
1107 {
1108 /* find the head */
1109 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1110 {
1111 Assert(pPage->iMonitoredPrev != pPage->idx);
1112 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1113 }
1114 return pPage;
1115 }
1116
1117 /* ignore, no monitoring. */
1118 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1119 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1120 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1121 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1122 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1123 break;
1124 default:
1125 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1126 }
1127 }
1128
1129 /* next */
1130 i = pPage->iNext;
1131 } while (i != NIL_PGMPOOL_IDX);
1132#endif
1133 return NULL;
1134}
1135
1136/**
1137 * Enabled write monitoring of a guest page.
1138 *
1139 * @returns VBox status code.
1140 * @retval VINF_SUCCESS on success.
1141 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1142 * @param pPool The pool.
1143 * @param pPage The cached page.
1144 */
1145static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1146{
1147 /*
1148 * Filter out the relevant kinds.
1149 */
1150 switch (pPage->enmKind)
1151 {
1152 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1153 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1154 break;
1155
1156 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1157 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1158 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1159 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1160 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1161 /* Nothing to monitor here. */
1162 return VINF_SUCCESS;
1163
1164 case PGMPOOLKIND_ROOT_32BIT_PD:
1165 case PGMPOOLKIND_ROOT_PAE_PD:
1166#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1167 break;
1168#endif
1169 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1170 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1171 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1172 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1173 case PGMPOOLKIND_ROOT_PDPTR:
1174 case PGMPOOLKIND_ROOT_PML4:
1175 default:
1176 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1177 }
1178
1179 /*
1180 * Install handler.
1181 */
1182 int rc;
1183 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1184 if (pPageHead)
1185 {
1186 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1187 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1188 pPage->iMonitoredPrev = pPageHead->idx;
1189 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1190 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1191 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1192 pPageHead->iMonitoredNext = pPage->idx;
1193 rc = VINF_SUCCESS;
1194 }
1195 else
1196 {
1197 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1198 PVM pVM = pPool->CTXSUFF(pVM);
1199 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1200 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1201 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1202 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1203 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1204 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
1205 pPool->pszAccessHandler);
1206 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1207 * the heap size should suffice. */
1208 AssertFatalRC(rc);
1209 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1210 rc = VERR_PGM_POOL_CLEARED;
1211 }
1212 pPage->fMonitored = true;
1213 return rc;
1214}
1215
1216
1217/**
1218 * Disables write monitoring of a guest page.
1219 *
1220 * @returns VBox status code.
1221 * @retval VINF_SUCCESS on success.
1222 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1223 * @param pPool The pool.
1224 * @param pPage The cached page.
1225 */
1226static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1227{
1228 /*
1229 * Filter out the relevant kinds.
1230 */
1231 switch (pPage->enmKind)
1232 {
1233 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1234 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1235 break;
1236
1237 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1238 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1239 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1240 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1241 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1242 /* Nothing to monitor here. */
1243 return VINF_SUCCESS;
1244
1245 case PGMPOOLKIND_ROOT_32BIT_PD:
1246 case PGMPOOLKIND_ROOT_PAE_PD:
1247#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1248 break;
1249#endif
1250 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1251 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1252 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1253 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1254 case PGMPOOLKIND_ROOT_PDPTR:
1255 case PGMPOOLKIND_ROOT_PML4:
1256 default:
1257 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1258 }
1259
1260 /*
1261 * Remove the page from the monitored list or uninstall it if last.
1262 */
1263 const PVM pVM = pPool->CTXSUFF(pVM);
1264 int rc;
1265 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1266 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1267 {
1268 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1269 {
1270 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1271 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1272 pNewHead->fCR3Mix = pPage->fCR3Mix;
1273 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1274 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1275 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1276 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pNewHead),
1277 pPool->pszAccessHandler);
1278 AssertFatalRCSuccess(rc);
1279 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1280 }
1281 else
1282 {
1283 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1284 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1285 {
1286 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1287 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1288 }
1289 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1290 rc = VINF_SUCCESS;
1291 }
1292 }
1293 else
1294 {
1295 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1296 AssertFatalRC(rc);
1297 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1298 rc = VERR_PGM_POOL_CLEARED;
1299 }
1300 pPage->fMonitored = false;
1301
1302 /*
1303 * Remove it from the list of modified pages (if in it).
1304 */
1305 pgmPoolMonitorModifiedRemove(pPool, pPage);
1306
1307 return rc;
1308}
1309
1310
1311#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1312/**
1313 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1314 *
1315 * @param pPool The Pool.
1316 * @param pPage A page in the chain.
1317 * @param fCR3Mix The new fCR3Mix value.
1318 */
1319static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1320{
1321 /* current */
1322 pPage->fCR3Mix = fCR3Mix;
1323
1324 /* before */
1325 int16_t idx = pPage->iMonitoredPrev;
1326 while (idx != NIL_PGMPOOL_IDX)
1327 {
1328 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1329 idx = pPool->aPages[idx].iMonitoredPrev;
1330 }
1331
1332 /* after */
1333 idx = pPage->iMonitoredNext;
1334 while (idx != NIL_PGMPOOL_IDX)
1335 {
1336 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1337 idx = pPool->aPages[idx].iMonitoredNext;
1338 }
1339}
1340
1341
1342/**
1343 * Installs or modifies monitoring of a CR3 page (special).
1344 *
1345 * We're pretending the CR3 page is shadowed by the pool so we can use the
1346 * generic mechanisms in detecting chained monitoring. (This also gives us a
1347 * tast of what code changes are required to really pool CR3 shadow pages.)
1348 *
1349 * @returns VBox status code.
1350 * @param pPool The pool.
1351 * @param idxRoot The CR3 (root) page index.
1352 * @param GCPhysCR3 The (new) CR3 value.
1353 */
1354int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1355{
1356 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1357 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1358 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1359 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1360
1361 /*
1362 * The unlikely case where it already matches.
1363 */
1364 if (pPage->GCPhys == GCPhysCR3)
1365 {
1366 Assert(pPage->fMonitored);
1367 return VINF_SUCCESS;
1368 }
1369
1370 /*
1371 * Flush the current monitoring and remove it from the hash.
1372 */
1373 int rc = VINF_SUCCESS;
1374 if (pPage->fMonitored)
1375 {
1376 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1377 rc = pgmPoolMonitorFlush(pPool, pPage);
1378 if (rc == VERR_PGM_POOL_CLEARED)
1379 rc = VINF_SUCCESS;
1380 else
1381 AssertFatalRC(rc);
1382 pgmPoolHashRemove(pPool, pPage);
1383 }
1384
1385 /*
1386 * Monitor the page at the new location and insert it into the hash.
1387 */
1388 pPage->GCPhys = GCPhysCR3;
1389 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1390 if (rc2 != VERR_PGM_POOL_CLEARED)
1391 {
1392 AssertFatalRC(rc2);
1393 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1394 rc = rc2;
1395 }
1396 pgmPoolHashInsert(pPool, pPage);
1397 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1398 return rc;
1399}
1400
1401
1402/**
1403 * Removes the monitoring of a CR3 page (special).
1404 *
1405 * @returns VBox status code.
1406 * @param pPool The pool.
1407 * @param idxRoot The CR3 (root) page index.
1408 */
1409int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1410{
1411 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1412 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1413 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1414 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1415
1416 if (!pPage->fMonitored)
1417 return VINF_SUCCESS;
1418
1419 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1420 int rc = pgmPoolMonitorFlush(pPool, pPage);
1421 if (rc != VERR_PGM_POOL_CLEARED)
1422 AssertFatalRC(rc);
1423 else
1424 rc = VINF_SUCCESS;
1425 pgmPoolHashRemove(pPool, pPage);
1426 Assert(!pPage->fMonitored);
1427 pPage->GCPhys = NIL_RTGCPHYS;
1428 return rc;
1429}
1430#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1431
1432
1433/**
1434 * Inserts the page into the list of modified pages.
1435 *
1436 * @param pPool The pool.
1437 * @param pPage The page.
1438 */
1439void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1440{
1441 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1442 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1443 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1444 && pPool->iModifiedHead != pPage->idx,
1445 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1446 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1447 pPool->iModifiedHead, pPool->cModifiedPages));
1448
1449 pPage->iModifiedNext = pPool->iModifiedHead;
1450 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1451 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1452 pPool->iModifiedHead = pPage->idx;
1453 pPool->cModifiedPages++;
1454#ifdef VBOX_WITH_STATISTICS
1455 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1456 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1457#endif
1458}
1459
1460
1461/**
1462 * Removes the page from the list of modified pages and resets the
1463 * moficiation counter.
1464 *
1465 * @param pPool The pool.
1466 * @param pPage The page which is believed to be in the list of modified pages.
1467 */
1468static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1469{
1470 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1471 if (pPool->iModifiedHead == pPage->idx)
1472 {
1473 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1474 pPool->iModifiedHead = pPage->iModifiedNext;
1475 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1476 {
1477 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1478 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1479 }
1480 pPool->cModifiedPages--;
1481 }
1482 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1483 {
1484 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1485 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1486 {
1487 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1488 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1489 }
1490 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1491 pPool->cModifiedPages--;
1492 }
1493 else
1494 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1495 pPage->cModifications = 0;
1496}
1497
1498
1499/**
1500 * Zaps the list of modified pages, resetting their modification counters in the process.
1501 *
1502 * @param pVM The VM handle.
1503 */
1504void pgmPoolMonitorModifiedClearAll(PVM pVM)
1505{
1506 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1507 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1508
1509 unsigned cPages = 0; NOREF(cPages);
1510 uint16_t idx = pPool->iModifiedHead;
1511 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1512 while (idx != NIL_PGMPOOL_IDX)
1513 {
1514 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1515 idx = pPage->iModifiedNext;
1516 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1517 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1518 pPage->cModifications = 0;
1519 Assert(++cPages);
1520 }
1521 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1522 pPool->cModifiedPages = 0;
1523}
1524
1525
1526/**
1527 * Clear all shadow pages and clear all modification counters.
1528 *
1529 * @param pVM The VM handle.
1530 * @remark Should only be used when monitoring is available, thus placed in
1531 * the PGMPOOL_WITH_MONITORING #ifdef.
1532 */
1533void pgmPoolClearAll(PVM pVM)
1534{
1535 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1536 STAM_PROFILE_START(&pPool->StatClearAll, c);
1537 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1538
1539 /*
1540 * Iterate all the pages until we've encountered all that in use.
1541 * This is simple but not quite optimal solution.
1542 */
1543 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1544 unsigned cLeft = pPool->cUsedPages;
1545 unsigned iPage = pPool->cCurPages;
1546 while (--iPage >= PGMPOOL_IDX_FIRST)
1547 {
1548 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1549 if (pPage->GCPhys != NIL_RTGCPHYS)
1550 {
1551 switch (pPage->enmKind)
1552 {
1553 /*
1554 * We only care about shadow page tables.
1555 */
1556 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1557 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1558 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1559 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1560 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1561 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1562 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1563 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1564 {
1565#ifdef PGMPOOL_WITH_USER_TRACKING
1566 if (pPage->cPresent)
1567#endif
1568 {
1569 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1570 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1571 ASMMemZeroPage(pvShw);
1572 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1573#ifdef PGMPOOL_WITH_USER_TRACKING
1574 pPage->cPresent = 0;
1575 pPage->iFirstPresent = ~0;
1576#endif
1577 }
1578 }
1579 /* fall thru */
1580
1581 default:
1582 Assert(!pPage->cModifications || ++cModifiedPages);
1583 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1584 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1585 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1586 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1587 pPage->cModifications = 0;
1588 break;
1589
1590 }
1591 if (!--cLeft)
1592 break;
1593 }
1594 }
1595
1596 /* swipe the special pages too. */
1597 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1598 {
1599 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1600 if (pPage->GCPhys != NIL_RTGCPHYS)
1601 {
1602 Assert(!pPage->cModifications || ++cModifiedPages);
1603 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1604 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1605 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1606 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1607 pPage->cModifications = 0;
1608 }
1609 }
1610
1611 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1612 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1613 pPool->cModifiedPages = 0;
1614
1615#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1616 /*
1617 * Clear all the GCPhys links and rebuild the phys ext free list.
1618 */
1619 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
1620 pRam;
1621 pRam = pRam->CTXSUFF(pNext))
1622 {
1623 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1624 while (iPage-- > 0)
1625 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1626 }
1627
1628 pPool->iPhysExtFreeHead = 0;
1629 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1630 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1631 for (unsigned i = 0; i < cMaxPhysExts; i++)
1632 {
1633 paPhysExts[i].iNext = i + 1;
1634 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1635 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1636 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1637 }
1638 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1639#endif
1640
1641
1642 pPool->cPresent = 0;
1643 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1644}
1645#endif /* PGMPOOL_WITH_MONITORING */
1646
1647
1648#ifdef PGMPOOL_WITH_USER_TRACKING
1649/**
1650 * Frees up at least one user entry.
1651 *
1652 * @returns VBox status code.
1653 * @retval VINF_SUCCESS if successfully added.
1654 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1655 * @param pPool The pool.
1656 * @param iUser The user index.
1657 */
1658static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1659{
1660 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1661#ifdef PGMPOOL_WITH_CACHE
1662 /*
1663 * Just free cached pages in a braindead fashion.
1664 */
1665 /** @todo walk the age list backwards and free the first with usage. */
1666 int rc = VINF_SUCCESS;
1667 do
1668 {
1669 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1670 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1671 rc = rc2;
1672 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1673 return rc;
1674#else
1675 /*
1676 * Lazy approach.
1677 */
1678 pgmPoolFlushAllInt(pPool);
1679 return VERR_PGM_POOL_FLUSHED;
1680#endif
1681}
1682
1683
1684/**
1685 * Inserts a page into the cache.
1686 *
1687 * This will create user node for the page, insert it into the GCPhys
1688 * hash, and insert it into the age list.
1689 *
1690 * @returns VBox status code.
1691 * @retval VINF_SUCCESS if successfully added.
1692 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1693 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1694 * @param pPool The pool.
1695 * @param pPage The cached page.
1696 * @param GCPhys The GC physical address of the page we're gonna shadow.
1697 * @param iUser The user index.
1698 * @param iUserTable The user table index.
1699 */
1700DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1701{
1702 int rc = VINF_SUCCESS;
1703 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1704
1705 /*
1706 * Find free a user node.
1707 */
1708 uint16_t i = pPool->iUserFreeHead;
1709 if (i == NIL_PGMPOOL_USER_INDEX)
1710 {
1711 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1712 if (VBOX_FAILURE(rc))
1713 return rc;
1714 i = pPool->iUserFreeHead;
1715 }
1716
1717 /*
1718 * Unlink the user node from the free list,
1719 * initialize and insert it into the user list.
1720 */
1721 pPool->iUserFreeHead = pUser[i].iNext;
1722 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1723 pUser[i].iUser = iUser;
1724 pUser[i].iUserTable = iUserTable;
1725 pPage->iUserHead = i;
1726
1727 /*
1728 * Insert into cache and enable monitoring of the guest page if enabled.
1729 *
1730 * Until we implement caching of all levels, including the CR3 one, we'll
1731 * have to make sure we don't try monitor & cache any recursive reuse of
1732 * a monitored CR3 page. Because all windows versions are doing this we'll
1733 * have to be able to do combined access monitoring, CR3 + PT and
1734 * PD + PT (guest PAE).
1735 *
1736 * Update:
1737 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1738 */
1739#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1740# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1741 const bool fCanBeMonitored = true;
1742# else
1743 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1744 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1745 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1746# endif
1747# ifdef PGMPOOL_WITH_CACHE
1748 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1749# endif
1750 if (fCanBeMonitored)
1751 {
1752# ifdef PGMPOOL_WITH_MONITORING
1753 rc = pgmPoolMonitorInsert(pPool, pPage);
1754 if (rc == VERR_PGM_POOL_CLEARED)
1755 {
1756 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1757# ifndef PGMPOOL_WITH_CACHE
1758 pgmPoolMonitorFlush(pPool, pPage);
1759 rc = VERR_PGM_POOL_FLUSHED;
1760# endif
1761 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1762 pUser[i].iNext = pPool->iUserFreeHead;
1763 pUser[i].iUser = NIL_PGMPOOL_IDX;
1764 pPool->iUserFreeHead = i;
1765 }
1766 }
1767# endif
1768#endif /* PGMPOOL_WITH_MONITORING */
1769 return rc;
1770}
1771
1772
1773# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1774/**
1775 * Adds a user reference to a page.
1776 *
1777 * This will
1778 * This will move the page to the head of the
1779 *
1780 * @returns VBox status code.
1781 * @retval VINF_SUCCESS if successfully added.
1782 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1783 * @param pPool The pool.
1784 * @param pPage The cached page.
1785 * @param iUser The user index.
1786 * @param iUserTable The user table.
1787 */
1788static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1789{
1790 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1791
1792# ifdef VBOX_STRICT
1793 /*
1794 * Check that the entry doesn't already exists.
1795 */
1796 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1797 {
1798 uint16_t i = pPage->iUserHead;
1799 do
1800 {
1801 Assert(i < pPool->cMaxUsers);
1802 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%d %d\n", iUser, iUserTable));
1803 i = paUsers[i].iNext;
1804 } while (i != NIL_PGMPOOL_USER_INDEX);
1805 }
1806# endif
1807
1808 /*
1809 * Allocate a user node.
1810 */
1811 uint16_t i = pPool->iUserFreeHead;
1812 if (i == NIL_PGMPOOL_USER_INDEX)
1813 {
1814 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1815 if (VBOX_FAILURE(rc))
1816 return rc;
1817 i = pPool->iUserFreeHead;
1818 }
1819 pPool->iUserFreeHead = paUsers[i].iNext;
1820
1821 /*
1822 * Initialize the user node and insert it.
1823 */
1824 paUsers[i].iNext = pPage->iUserHead;
1825 paUsers[i].iUser = iUser;
1826 paUsers[i].iUserTable = iUserTable;
1827 pPage->iUserHead = i;
1828
1829# ifdef PGMPOOL_WITH_CACHE
1830 /*
1831 * Tell the cache to update its replacement stats for this page.
1832 */
1833 pgmPoolCacheUsed(pPool, pPage);
1834# endif
1835 return VINF_SUCCESS;
1836}
1837# endif /* PGMPOOL_WITH_CACHE */
1838
1839
1840/**
1841 * Frees a user record associated with a page.
1842 *
1843 * This does not clear the entry in the user table, it simply replaces the
1844 * user record to the chain of free records.
1845 *
1846 * @param pPool The pool.
1847 * @param HCPhys The HC physical address of the shadow page.
1848 * @param iUser The shadow page pool index of the user table.
1849 * @param iUserTable The index into the user table (shadowed).
1850 */
1851static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1852{
1853 /*
1854 * Unlink and free the specified user entry.
1855 */
1856 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1857
1858 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1859 uint16_t i = pPage->iUserHead;
1860 if ( i != NIL_PGMPOOL_USER_INDEX
1861 && paUsers[i].iUser == iUser
1862 && paUsers[i].iUserTable == iUserTable)
1863 {
1864 pPage->iUserHead = paUsers[i].iNext;
1865
1866 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1867 paUsers[i].iNext = pPool->iUserFreeHead;
1868 pPool->iUserFreeHead = i;
1869 return;
1870 }
1871
1872 /* General: Linear search. */
1873 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1874 while (i != NIL_PGMPOOL_USER_INDEX)
1875 {
1876 if ( paUsers[i].iUser == iUser
1877 && paUsers[i].iUserTable == iUserTable)
1878 {
1879 if (iPrev != NIL_PGMPOOL_USER_INDEX)
1880 paUsers[iPrev].iNext = paUsers[i].iNext;
1881 else
1882 pPage->iUserHead = paUsers[i].iNext;
1883
1884 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1885 paUsers[i].iNext = pPool->iUserFreeHead;
1886 pPool->iUserFreeHead = i;
1887 return;
1888 }
1889 iPrev = i;
1890 i = paUsers[i].iNext;
1891 }
1892
1893 /* Fatal: didn't find it */
1894 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
1895 iUser, iUserTable, pPage->GCPhys));
1896}
1897
1898
1899/**
1900 * Gets the entry size of a shadow table.
1901 *
1902 * @param enmKind
1903 * The kind of page.
1904 *
1905 * @returns The size of the entry in bytes. That is, 4 or 8.
1906 * @returns If the kind is not for a table, an assertion is raised and 0 is
1907 * returned.
1908 */
1909DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
1910{
1911 switch (enmKind)
1912 {
1913 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1914 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1915 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1916 case PGMPOOLKIND_ROOT_32BIT_PD:
1917 return 4;
1918
1919 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1920 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1921 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1922 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1923 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1924 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1925 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1926 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1927 case PGMPOOLKIND_ROOT_PAE_PD:
1928 case PGMPOOLKIND_ROOT_PDPTR:
1929 case PGMPOOLKIND_ROOT_PML4:
1930 return 8;
1931
1932 default:
1933 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1934 }
1935}
1936
1937
1938/**
1939 * Gets the entry size of a guest table.
1940 *
1941 * @param enmKind
1942 * The kind of page.
1943 *
1944 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
1945 * @returns If the kind is not for a table, an assertion is raised and 0 is
1946 * returned.
1947 */
1948DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
1949{
1950 switch (enmKind)
1951 {
1952 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1953 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1954 case PGMPOOLKIND_ROOT_32BIT_PD:
1955 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1956 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1957 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1958 return 4;
1959
1960 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1961 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1962 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1963 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1964 case PGMPOOLKIND_ROOT_PAE_PD:
1965 case PGMPOOLKIND_ROOT_PDPTR:
1966 case PGMPOOLKIND_ROOT_PML4:
1967 return 8;
1968
1969 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1970 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1971 /** @todo can we return 0? (nobody is calling this...) */
1972 return 0;
1973
1974 default:
1975 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1976 }
1977}
1978
1979
1980#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1981/**
1982 * Scans one shadow page table for mappings of a physical page.
1983 *
1984 * @param pVM The VM handle.
1985 * @param pPhysPage The guest page in question.
1986 * @param iShw The shadow page table.
1987 * @param cRefs The number of references made in that PT.
1988 */
1989static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
1990{
1991 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
1992 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1993
1994 /*
1995 * Assert sanity.
1996 */
1997 Assert(cRefs == 1);
1998 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
1999 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2000
2001 /*
2002 * Then, clear the actual mappings to the page in the shadow PT.
2003 */
2004 switch (pPage->enmKind)
2005 {
2006 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2007 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2008 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2009 {
2010 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2011 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2012 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2013 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2014 {
2015 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2016 pPT->a[i].u = 0;
2017 cRefs--;
2018 if (!cRefs)
2019 return;
2020 }
2021#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2022 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2023 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2024 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2025 {
2026 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2027 pPT->a[i].u = 0;
2028 }
2029#endif
2030 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2031 break;
2032 }
2033
2034 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2035 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2036 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2037 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2038 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2039 {
2040 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2041 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2042 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2043 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2044 {
2045 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2046 pPT->a[i].u = 0;
2047 cRefs--;
2048 if (!cRefs)
2049 return;
2050 }
2051#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2052 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2053 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2054 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2055 {
2056 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2057 pPT->a[i].u = 0;
2058 }
2059#endif
2060 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2061 break;
2062 }
2063
2064 default:
2065 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2066 }
2067}
2068
2069
2070/**
2071 * Scans one shadow page table for mappings of a physical page.
2072 *
2073 * @param pVM The VM handle.
2074 * @param pPhysPage The guest page in question.
2075 * @param iShw The shadow page table.
2076 * @param cRefs The number of references made in that PT.
2077 */
2078void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2079{
2080 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2081 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2082 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2083 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2084 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2085 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2086}
2087
2088
2089/**
2090 * Flushes a list of shadow page tables mapping the same physical page.
2091 *
2092 * @param pVM The VM handle.
2093 * @param pPhysPage The guest page in question.
2094 * @param iPhysExt The physical cross reference extent list to flush.
2095 */
2096void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2097{
2098 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2099 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2100 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2101
2102 const uint16_t iPhysExtStart = iPhysExt;
2103 PPGMPOOLPHYSEXT pPhysExt;
2104 do
2105 {
2106 Assert(iPhysExt < pPool->cMaxPhysExts);
2107 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2108 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2109 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2110 {
2111 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2112 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2113 }
2114
2115 /* next */
2116 iPhysExt = pPhysExt->iNext;
2117 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2118
2119 /* insert the list into the free list and clear the ram range entry. */
2120 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2121 pPool->iPhysExtFreeHead = iPhysExtStart;
2122 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2123
2124 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2125}
2126#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2127
2128
2129/**
2130 * Scans all shadow page tables for mappings of a physical page.
2131 *
2132 * This may be slow, but it's most likely more efficient than cleaning
2133 * out the entire page pool / cache.
2134 *
2135 * @returns VBox status code.
2136 * @retval VINF_SUCCESS if all references has been successfully cleared.
2137 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2138 * a page pool cleaning.
2139 *
2140 * @param pVM The VM handle.
2141 * @param pPhysPage The guest page in question.
2142 */
2143int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2144{
2145 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2146 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2147 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2148 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2149
2150#if 1
2151 /*
2152 * There is a limit to what makes sense.
2153 */
2154 if (pPool->cPresent > 1024)
2155 {
2156 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2157 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2158 return VINF_PGM_GCPHYS_ALIASED;
2159 }
2160#endif
2161
2162 /*
2163 * Iterate all the pages until we've encountered all that in use.
2164 * This is simple but not quite optimal solution.
2165 */
2166 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2167 const uint32_t u32 = u64;
2168 unsigned cLeft = pPool->cUsedPages;
2169 unsigned iPage = pPool->cCurPages;
2170 while (--iPage >= PGMPOOL_IDX_FIRST)
2171 {
2172 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2173 if (pPage->GCPhys != NIL_RTGCPHYS)
2174 {
2175 switch (pPage->enmKind)
2176 {
2177 /*
2178 * We only care about shadow page tables.
2179 */
2180 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2181 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2182 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2183 {
2184 unsigned cPresent = pPage->cPresent;
2185 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2186 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2187 if (pPT->a[i].n.u1Present)
2188 {
2189 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2190 {
2191 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2192 pPT->a[i].u = 0;
2193 }
2194 if (!--cPresent)
2195 break;
2196 }
2197 break;
2198 }
2199
2200 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2201 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2202 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2203 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2204 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2205 {
2206 unsigned cPresent = pPage->cPresent;
2207 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2208 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2209 if (pPT->a[i].n.u1Present)
2210 {
2211 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2212 {
2213 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2214 pPT->a[i].u = 0;
2215 }
2216 if (!--cPresent)
2217 break;
2218 }
2219 break;
2220 }
2221 }
2222 if (!--cLeft)
2223 break;
2224 }
2225 }
2226
2227 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2228 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2229 return VINF_SUCCESS;
2230}
2231
2232
2233/**
2234 * Clears the user entry in a user table.
2235 *
2236 * This is used to remove all references to a page when flushing it.
2237 */
2238static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2239{
2240 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2241 Assert(pUser->iUser < pPool->cCurPages);
2242
2243 /*
2244 * Map the user page.
2245 */
2246 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2247 union
2248 {
2249 uint64_t *pau64;
2250 uint32_t *pau32;
2251 } u;
2252 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2253
2254#ifdef VBOX_STRICT
2255 /*
2256 * Some sanity checks.
2257 */
2258 switch (pUserPage->enmKind)
2259 {
2260 case PGMPOOLKIND_ROOT_32BIT_PD:
2261 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2262 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2263 break;
2264 case PGMPOOLKIND_ROOT_PAE_PD:
2265 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2266 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2267 break;
2268 case PGMPOOLKIND_ROOT_PDPTR:
2269 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2270 Assert(pUser->iUserTable < 4);
2271 break;
2272 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2273 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2274 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2275 break;
2276 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2277 case PGMPOOLKIND_ROOT_PML4:
2278 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2279 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2280 break;
2281 default:
2282 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2283 break;
2284 }
2285#endif /* VBOX_STRICT */
2286
2287 /*
2288 * Clear the entry in the user page.
2289 */
2290 switch (pUserPage->enmKind)
2291 {
2292 /* 32-bit entries */
2293 case PGMPOOLKIND_ROOT_32BIT_PD:
2294 u.pau32[pUser->iUserTable] = 0;
2295 break;
2296
2297 /* 64-bit entries */
2298 case PGMPOOLKIND_ROOT_PAE_PD:
2299 case PGMPOOLKIND_ROOT_PDPTR:
2300 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2301 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2302 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2303 case PGMPOOLKIND_ROOT_PML4:
2304 u.pau64[pUser->iUserTable] = 0;
2305 break;
2306
2307 default:
2308 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2309 }
2310}
2311
2312
2313/**
2314 * Clears all users of a page.
2315 */
2316static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2317{
2318 /*
2319 * Free all the user records.
2320 */
2321 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2322 uint16_t i = pPage->iUserHead;
2323 while (i != NIL_PGMPOOL_USER_INDEX)
2324 {
2325 /* Clear enter in user table. */
2326 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2327
2328 /* Free it. */
2329 const uint16_t iNext = paUsers[i].iNext;
2330 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2331 paUsers[i].iNext = pPool->iUserFreeHead;
2332 pPool->iUserFreeHead = i;
2333
2334 /* Next. */
2335 i = iNext;
2336 }
2337 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2338}
2339
2340
2341#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2342/**
2343 * Allocates a new physical cross reference extent.
2344 *
2345 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2346 * @param pVM The VM handle.
2347 * @param piPhysExt Where to store the phys ext index.
2348 */
2349PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2350{
2351 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2352 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2353 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2354 {
2355 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2356 return NULL;
2357 }
2358 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2359 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2360 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2361 *piPhysExt = iPhysExt;
2362 return pPhysExt;
2363}
2364
2365
2366/**
2367 * Frees a physical cross reference extent.
2368 *
2369 * @param pVM The VM handle.
2370 * @param iPhysExt The extent to free.
2371 */
2372void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2373{
2374 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2375 Assert(iPhysExt < pPool->cMaxPhysExts);
2376 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2377 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2378 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2379 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2380 pPool->iPhysExtFreeHead = iPhysExt;
2381}
2382
2383
2384/**
2385 * Frees a physical cross reference extent.
2386 *
2387 * @param pVM The VM handle.
2388 * @param iPhysExt The extent to free.
2389 */
2390void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2391{
2392 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2393
2394 const uint16_t iPhysExtStart = iPhysExt;
2395 PPGMPOOLPHYSEXT pPhysExt;
2396 do
2397 {
2398 Assert(iPhysExt < pPool->cMaxPhysExts);
2399 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2400 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2401 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2402
2403 /* next */
2404 iPhysExt = pPhysExt->iNext;
2405 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2406
2407 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2408 pPool->iPhysExtFreeHead = iPhysExtStart;
2409}
2410
2411/**
2412 * Insert a reference into a list of physical cross reference extents.
2413 *
2414 * @returns The new ram range flags (top 16-bits).
2415 *
2416 * @param pVM The VM handle.
2417 * @param iPhysExt The physical extent index of the list head.
2418 * @param iShwPT The shadow page table index.
2419 *
2420 */
2421static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2422{
2423 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2424 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2425
2426 /* special common case. */
2427 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2428 {
2429 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2430 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2431 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2432 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2433 }
2434
2435 /* general treatment. */
2436 const uint16_t iPhysExtStart = iPhysExt;
2437 unsigned cMax = 15;
2438 for (;;)
2439 {
2440 Assert(iPhysExt < pPool->cMaxPhysExts);
2441 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2442 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2443 {
2444 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2445 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2446 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2447 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2448 }
2449 if (!--cMax)
2450 {
2451 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2452 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2453 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2454 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2455 }
2456 }
2457
2458 /* add another extent to the list. */
2459 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2460 if (!pNew)
2461 {
2462 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2463 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2464 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2465 }
2466 pNew->iNext = iPhysExtStart;
2467 pNew->aidx[0] = iShwPT;
2468 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2469 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2470}
2471
2472
2473/**
2474 * Add a reference to guest physical page where extents are in use.
2475 *
2476 * @returns The new ram range flags (top 16-bits).
2477 *
2478 * @param pVM The VM handle.
2479 * @param u16 The ram range flags (top 16-bits).
2480 * @param iShwPT The shadow page table index.
2481 */
2482uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2483{
2484 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2485 {
2486 /*
2487 * Convert to extent list.
2488 */
2489 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2490 uint16_t iPhysExt;
2491 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2492 if (pPhysExt)
2493 {
2494 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2495 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2496 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2497 pPhysExt->aidx[1] = iShwPT;
2498 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2499 }
2500 else
2501 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2502 }
2503 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2504 {
2505 /*
2506 * Insert into the extent list.
2507 */
2508 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2509 }
2510 else
2511 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2512 return u16;
2513}
2514
2515
2516/**
2517 * Clear references to guest physical memory.
2518 *
2519 * @param pPool The pool.
2520 * @param pPage The page.
2521 * @param pPhysPage Pointer to the aPages entry in the ram range.
2522 */
2523void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2524{
2525 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2526 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2527
2528 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2529 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2530 {
2531 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2532 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2533 do
2534 {
2535 Assert(iPhysExt < pPool->cMaxPhysExts);
2536
2537 /*
2538 * Look for the shadow page and check if it's all freed.
2539 */
2540 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2541 {
2542 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2543 {
2544 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2545
2546 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2547 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2548 {
2549 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2550 return;
2551 }
2552
2553 /* we can free the node. */
2554 PVM pVM = pPool->CTXSUFF(pVM);
2555 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2556 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2557 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2558 {
2559 /* lonely node */
2560 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2561 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2562 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2563 }
2564 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2565 {
2566 /* head */
2567 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2568 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2569 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2570 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2571 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2572 }
2573 else
2574 {
2575 /* in list */
2576 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2577 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2578 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2579 }
2580 iPhysExt = iPhysExtNext;
2581 return;
2582 }
2583 }
2584
2585 /* next */
2586 iPhysExtPrev = iPhysExt;
2587 iPhysExt = paPhysExts[iPhysExt].iNext;
2588 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2589
2590 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2591 }
2592 else /* nothing to do */
2593 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
2594}
2595
2596
2597
2598/**
2599 * Clear references to guest physical memory.
2600 *
2601 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2602 * is assumed to be correct, so the linear search can be skipped and we can assert
2603 * at an earlier point.
2604 *
2605 * @param pPool The pool.
2606 * @param pPage The page.
2607 * @param HCPhys The host physical address corresponding to the guest page.
2608 * @param GCPhys The guest physical address corresponding to HCPhys.
2609 */
2610static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2611{
2612 /*
2613 * Walk range list.
2614 */
2615 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2616 while (pRam)
2617 {
2618 RTGCPHYS off = GCPhys - pRam->GCPhys;
2619 if (off < pRam->cb)
2620 {
2621 /* does it match? */
2622 const unsigned iPage = off >> PAGE_SHIFT;
2623 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2624 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2625 {
2626 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2627 return;
2628 }
2629 break;
2630 }
2631 pRam = CTXSUFF(pRam->pNext);
2632 }
2633 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2634}
2635
2636
2637/**
2638 * Clear references to guest physical memory.
2639 *
2640 * @param pPool The pool.
2641 * @param pPage The page.
2642 * @param HCPhys The host physical address corresponding to the guest page.
2643 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2644 */
2645static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2646{
2647 /*
2648 * Walk range list.
2649 */
2650 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2651 while (pRam)
2652 {
2653 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2654 if (off < pRam->cb)
2655 {
2656 /* does it match? */
2657 const unsigned iPage = off >> PAGE_SHIFT;
2658 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2659 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2660 {
2661 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2662 return;
2663 }
2664 break;
2665 }
2666 pRam = CTXSUFF(pRam->pNext);
2667 }
2668
2669 /*
2670 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2671 */
2672 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2673 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2674 while (pRam)
2675 {
2676 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2677 while (iPage-- > 0)
2678 {
2679 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2680 {
2681 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2682 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2683 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2684 return;
2685 }
2686 }
2687 pRam = CTXSUFF(pRam->pNext);
2688 }
2689
2690 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2691}
2692
2693
2694/**
2695 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2696 *
2697 * @param pPool The pool.
2698 * @param pPage The page.
2699 * @param pShwPT The shadow page table (mapping of the page).
2700 * @param pGstPT The guest page table.
2701 */
2702DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2703{
2704 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2705 if (pShwPT->a[i].n.u1Present)
2706 {
2707 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2708 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2709 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2710 if (!--pPage->cPresent)
2711 break;
2712 }
2713}
2714
2715
2716/**
2717 * Clear references to guest physical memory in a PAE / 32-bit page table.
2718 *
2719 * @param pPool The pool.
2720 * @param pPage The page.
2721 * @param pShwPT The shadow page table (mapping of the page).
2722 * @param pGstPT The guest page table (just a half one).
2723 */
2724DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2725{
2726 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2727 if (pShwPT->a[i].n.u1Present)
2728 {
2729 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2730 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2731 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2732 }
2733}
2734
2735
2736/**
2737 * Clear references to guest physical memory in a PAE / PAE page table.
2738 *
2739 * @param pPool The pool.
2740 * @param pPage The page.
2741 * @param pShwPT The shadow page table (mapping of the page).
2742 * @param pGstPT The guest page table.
2743 */
2744DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2745{
2746 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2747 if (pShwPT->a[i].n.u1Present)
2748 {
2749 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
2750 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
2751 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2752 }
2753}
2754
2755
2756/**
2757 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2758 *
2759 * @param pPool The pool.
2760 * @param pPage The page.
2761 * @param pShwPT The shadow page table (mapping of the page).
2762 */
2763DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2764{
2765 RTGCPHYS GCPhys = pPage->GCPhys;
2766 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2767 if (pShwPT->a[i].n.u1Present)
2768 {
2769 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2770 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2771 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2772 }
2773}
2774
2775
2776/**
2777 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2778 *
2779 * @param pPool The pool.
2780 * @param pPage The page.
2781 * @param pShwPT The shadow page table (mapping of the page).
2782 */
2783DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2784{
2785 RTGCPHYS GCPhys = pPage->GCPhys;
2786 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2787 if (pShwPT->a[i].n.u1Present)
2788 {
2789 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2790 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
2791 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2792 }
2793}
2794#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2795
2796
2797/**
2798 * Clear references to shadowed pages in a PAE page directory.
2799 *
2800 * @param pPool The pool.
2801 * @param pPage The page.
2802 * @param pShwPD The shadow page directory (mapping of the page).
2803 */
2804DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2805{
2806 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2807 {
2808 if (pShwPD->a[i].n.u1Present)
2809 {
2810 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2811 if (pSubPage)
2812 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2813 else
2814 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2815 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2816 }
2817 }
2818}
2819
2820
2821/**
2822 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2823 *
2824 * @param pPool The pool.
2825 * @param pPage The page.
2826 * @param pShwPdPtr The shadow page directory pointer table (mapping of the page).
2827 */
2828DECLINLINE(void) pgmPoolTrackDerefPDPTR64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPTR pShwPdPtr)
2829{
2830 for (unsigned i = 0; i < ELEMENTS(pShwPdPtr->a); i++)
2831 {
2832 if (pShwPdPtr->a[i].n.u1Present)
2833 {
2834 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPdPtr->a[i].u & X86_PDPE_PG_MASK);
2835 if (pSubPage)
2836 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2837 else
2838 AssertFatalMsgFailed(("%RX64\n", pShwPdPtr->a[i].u & X86_PDPE_PG_MASK));
2839 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2840 }
2841 }
2842}
2843
2844
2845/**
2846 * Clears all references made by this page.
2847 *
2848 * This includes other shadow pages and GC physical addresses.
2849 *
2850 * @param pPool The pool.
2851 * @param pPage The page.
2852 */
2853static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2854{
2855 /*
2856 * Map the shadow page and take action according to the page kind.
2857 */
2858 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2859 switch (pPage->enmKind)
2860 {
2861#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2862 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2863 {
2864 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2865 void *pvGst;
2866 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2867 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2868 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2869 break;
2870 }
2871
2872 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2873 {
2874 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2875 void *pvGst;
2876 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2877 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2878 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2879 break;
2880 }
2881
2882 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2883 {
2884 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2885 void *pvGst;
2886 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2887 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
2888 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2889 break;
2890 }
2891
2892 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
2893 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2894 {
2895 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2896 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
2897 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2898 break;
2899 }
2900
2901 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
2902 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2903 {
2904 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2905 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
2906 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2907 break;
2908 }
2909
2910#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2911 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2912 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2913 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2914 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2915 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2916 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2917 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2918 break;
2919#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2920
2921 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2922 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2923 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
2924 break;
2925
2926 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2927 pgmPoolTrackDerefPDPTR64Bit(pPool, pPage, (PX86PDPTR)pvShw);
2928 break;
2929
2930 default:
2931 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
2932 }
2933
2934 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
2935 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2936 ASMMemZeroPage(pvShw);
2937 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2938 pPage->fZeroed = true;
2939}
2940#endif /* PGMPOOL_WITH_USER_TRACKING */
2941
2942
2943/**
2944 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
2945 *
2946 * @param pPool The pool.
2947 */
2948static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
2949{
2950 /*
2951 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
2952 */
2953 Assert(NIL_PGMPOOL_IDX == 0);
2954 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
2955 {
2956 /*
2957 * Get the page address.
2958 */
2959 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2960 union
2961 {
2962 uint64_t *pau64;
2963 uint32_t *pau32;
2964 } u;
2965 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2966
2967 /*
2968 * Mark stuff not present.
2969 */
2970 switch (pPage->enmKind)
2971 {
2972 case PGMPOOLKIND_ROOT_32BIT_PD:
2973 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
2974 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2975 u.pau32[iPage] = 0;
2976 break;
2977
2978 case PGMPOOLKIND_ROOT_PAE_PD:
2979 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * 4; iPage++)
2980 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2981 u.pau64[iPage] = 0;
2982 break;
2983
2984 case PGMPOOLKIND_ROOT_PML4:
2985 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
2986 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
2987 u.pau64[iPage] = 0;
2988 break;
2989
2990 case PGMPOOLKIND_ROOT_PDPTR:
2991 /* Not root of shadowed pages currently, ignore it. */
2992 break;
2993 }
2994 }
2995
2996 /*
2997 * Paranoia (to be removed), flag a global CR3 sync.
2998 */
2999 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3000}
3001
3002
3003/**
3004 * Flushes the entire cache.
3005 *
3006 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3007 * and execute this CR3 flush.
3008 *
3009 * @param pPool The pool.
3010 */
3011static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3012{
3013 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3014 LogFlow(("pgmPoolFlushAllInt:\n"));
3015
3016 /*
3017 * If there are no pages in the pool, there is nothing to do.
3018 */
3019 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3020 {
3021 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3022 return;
3023 }
3024
3025 /*
3026 * Nuke the free list and reinsert all pages into it.
3027 */
3028 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3029 {
3030 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3031
3032#ifdef IN_RING3
3033 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3034#endif
3035#ifdef PGMPOOL_WITH_MONITORING
3036 if (pPage->fMonitored)
3037 pgmPoolMonitorFlush(pPool, pPage);
3038 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3039 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3040 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3041 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3042 pPage->cModifications = 0;
3043#endif
3044 pPage->GCPhys = NIL_RTGCPHYS;
3045 pPage->enmKind = PGMPOOLKIND_FREE;
3046 Assert(pPage->idx == i);
3047 pPage->iNext = i + 1;
3048 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3049 pPage->fSeenNonGlobal = false;
3050 pPage->fMonitored= false;
3051 pPage->fCached = false;
3052 pPage->fReusedFlushPending = false;
3053 pPage->fCR3Mix = false;
3054#ifdef PGMPOOL_WITH_USER_TRACKING
3055 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3056#endif
3057#ifdef PGMPOOL_WITH_CACHE
3058 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3059 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3060#endif
3061 }
3062 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3063 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3064 pPool->cUsedPages = 0;
3065
3066#ifdef PGMPOOL_WITH_USER_TRACKING
3067 /*
3068 * Zap and reinitialize the user records.
3069 */
3070 pPool->cPresent = 0;
3071 pPool->iUserFreeHead = 0;
3072 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3073 const unsigned cMaxUsers = pPool->cMaxUsers;
3074 for (unsigned i = 0; i < cMaxUsers; i++)
3075 {
3076 paUsers[i].iNext = i + 1;
3077 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3078 paUsers[i].iUserTable = 0xfffe;
3079 }
3080 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3081#endif
3082
3083#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3084 /*
3085 * Clear all the GCPhys links and rebuild the phys ext free list.
3086 */
3087 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
3088 pRam;
3089 pRam = pRam->CTXSUFF(pNext))
3090 {
3091 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3092 while (iPage-- > 0)
3093 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3094 }
3095
3096 pPool->iPhysExtFreeHead = 0;
3097 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3098 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3099 for (unsigned i = 0; i < cMaxPhysExts; i++)
3100 {
3101 paPhysExts[i].iNext = i + 1;
3102 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3103 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3104 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3105 }
3106 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3107#endif
3108
3109#ifdef PGMPOOL_WITH_MONITORING
3110 /*
3111 * Just zap the modified list.
3112 */
3113 pPool->cModifiedPages = 0;
3114 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3115#endif
3116
3117#ifdef PGMPOOL_WITH_CACHE
3118 /*
3119 * Clear the GCPhys hash and the age list.
3120 */
3121 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3122 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3123 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3124 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3125#endif
3126
3127 /*
3128 * Flush all the special root pages.
3129 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3130 */
3131 pgmPoolFlushAllSpecialRoots(pPool);
3132 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3133 {
3134 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3135 pPage->iNext = NIL_PGMPOOL_IDX;
3136#ifdef PGMPOOL_WITH_MONITORING
3137 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3138 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3139 pPage->cModifications = 0;
3140 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3141 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3142 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3143 if (pPage->fMonitored)
3144 {
3145 PVM pVM = pPool->CTXSUFF(pVM);
3146 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3147 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3148 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3149 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
3150 pPool->pszAccessHandler);
3151 AssertFatalRCSuccess(rc);
3152# ifdef PGMPOOL_WITH_CACHE
3153 pgmPoolHashInsert(pPool, pPage);
3154# endif
3155 }
3156#endif
3157#ifdef PGMPOOL_WITH_USER_TRACKING
3158 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3159#endif
3160#ifdef PGMPOOL_WITH_CACHE
3161 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3162 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3163#endif
3164 }
3165
3166 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3167}
3168
3169
3170/**
3171 * Flushes a pool page.
3172 *
3173 * This moves the page to the free list after removing all user references to it.
3174 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3175 *
3176 * @returns VBox status code.
3177 * @retval VINF_SUCCESS on success.
3178 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3179 * @param pPool The pool.
3180 * @param HCPhys The HC physical address of the shadow page.
3181 */
3182int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3183{
3184 int rc = VINF_SUCCESS;
3185 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3186 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3187 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3188
3189 /*
3190 * Quietly reject any attempts at flushing any of the special root pages.
3191 */
3192 if (pPage->idx < PGMPOOL_IDX_FIRST)
3193 {
3194 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3195 return VINF_SUCCESS;
3196 }
3197
3198 /*
3199 * Mark the page as being in need of a ASMMemZeroPage().
3200 */
3201 pPage->fZeroed = false;
3202
3203#ifdef PGMPOOL_WITH_USER_TRACKING
3204 /*
3205 * Clear the page.
3206 */
3207 pgmPoolTrackClearPageUsers(pPool, pPage);
3208 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3209 pgmPoolTrackDeref(pPool, pPage);
3210 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3211#endif
3212
3213#ifdef PGMPOOL_WITH_CACHE
3214 /*
3215 * Flush it from the cache.
3216 */
3217 pgmPoolCacheFlushPage(pPool, pPage);
3218#endif /* PGMPOOL_WITH_CACHE */
3219
3220#ifdef PGMPOOL_WITH_MONITORING
3221 /*
3222 * Deregistering the monitoring.
3223 */
3224 if (pPage->fMonitored)
3225 rc = pgmPoolMonitorFlush(pPool, pPage);
3226#endif
3227
3228 /*
3229 * Free the page.
3230 */
3231 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3232 pPage->iNext = pPool->iFreeHead;
3233 pPool->iFreeHead = pPage->idx;
3234 pPage->enmKind = PGMPOOLKIND_FREE;
3235 pPage->GCPhys = NIL_RTGCPHYS;
3236 pPage->fReusedFlushPending = false;
3237
3238 pPool->cUsedPages--;
3239 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3240 return rc;
3241}
3242
3243
3244/**
3245 * Frees a usage of a pool page.
3246 *
3247 * The caller is responsible to updating the user table so that it no longer
3248 * references the shadow page.
3249 *
3250 * @param pPool The pool.
3251 * @param HCPhys The HC physical address of the shadow page.
3252 * @param iUser The shadow page pool index of the user table.
3253 * @param iUserTable The index into the user table (shadowed).
3254 */
3255void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3256{
3257 STAM_PROFILE_START(&pPool->StatFree, a);
3258 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3259 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3260 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3261#ifdef PGMPOOL_WITH_USER_TRACKING
3262 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3263#endif
3264#ifdef PGMPOOL_WITH_CACHE
3265 if (!pPage->fCached)
3266#endif
3267 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3268 STAM_PROFILE_STOP(&pPool->StatFree, a);
3269}
3270
3271
3272/**
3273 * Makes one or more free page free.
3274 *
3275 * @returns VBox status code.
3276 * @retval VINF_SUCCESS on success.
3277 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3278 *
3279 * @param pPool The pool.
3280 * @param iUser The user of the page.
3281 */
3282static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3283{
3284 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3285
3286 /*
3287 * If the pool isn't full grown yet, expand it.
3288 */
3289 if (pPool->cCurPages < pPool->cMaxPages)
3290 {
3291 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3292#ifdef IN_RING3
3293 int rc = PGMR3PoolGrow(pPool->pVMHC);
3294#else
3295 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3296#endif
3297 if (VBOX_FAILURE(rc))
3298 return rc;
3299 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3300 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3301 return VINF_SUCCESS;
3302 }
3303
3304#ifdef PGMPOOL_WITH_CACHE
3305 /*
3306 * Free one cached page.
3307 */
3308 return pgmPoolCacheFreeOne(pPool, iUser);
3309#else
3310 /*
3311 * Flush the pool.
3312 * If we have tracking enabled, it should be possible to come up with
3313 * a cheap replacement strategy...
3314 */
3315 pgmPoolFlushAllInt(pPool);
3316 return VERR_PGM_POOL_FLUSHED;
3317#endif
3318}
3319
3320
3321/**
3322 * Allocates a page from the pool.
3323 *
3324 * This page may actually be a cached page and not in need of any processing
3325 * on the callers part.
3326 *
3327 * @returns VBox status code.
3328 * @retval VINF_SUCCESS if a NEW page was allocated.
3329 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3330 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3331 * @param pVM The VM handle.
3332 * @param GCPhys The GC physical address of the page we're gonna shadow.
3333 * For 4MB and 2MB PD entries, it's the first address the
3334 * shadow PT is covering.
3335 * @param enmKind The kind of mapping.
3336 * @param iUser The shadow page pool index of the user table.
3337 * @param iUserTable The index into the user table (shadowed).
3338 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3339 */
3340int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3341{
3342 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3343 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3344 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3345
3346 *ppPage = NULL;
3347
3348#ifdef PGMPOOL_WITH_CACHE
3349 if (pPool->fCacheEnabled)
3350 {
3351 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3352 if (VBOX_SUCCESS(rc2))
3353 {
3354 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3355 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3356 return rc2;
3357 }
3358 }
3359#endif
3360
3361 /*
3362 * Allocate a new one.
3363 */
3364 int rc = VINF_SUCCESS;
3365 uint16_t iNew = pPool->iFreeHead;
3366 if (iNew == NIL_PGMPOOL_IDX)
3367 {
3368 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3369 if (VBOX_FAILURE(rc))
3370 {
3371 if (rc != VERR_PGM_POOL_CLEARED)
3372 {
3373 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3374 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3375 return rc;
3376 }
3377 rc = VERR_PGM_POOL_FLUSHED;
3378 }
3379 iNew = pPool->iFreeHead;
3380 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3381 }
3382
3383 /* unlink the free head */
3384 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3385 pPool->iFreeHead = pPage->iNext;
3386 pPage->iNext = NIL_PGMPOOL_IDX;
3387
3388 /*
3389 * Initialize it.
3390 */
3391 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3392 pPage->enmKind = enmKind;
3393 pPage->GCPhys = GCPhys;
3394 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3395 pPage->fMonitored = false;
3396 pPage->fCached = false;
3397 pPage->fReusedFlushPending = false;
3398 pPage->fCR3Mix = false;
3399#ifdef PGMPOOL_WITH_MONITORING
3400 pPage->cModifications = 0;
3401 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3402 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3403#endif
3404#ifdef PGMPOOL_WITH_USER_TRACKING
3405 pPage->cPresent = 0;
3406 pPage->iFirstPresent = ~0;
3407
3408 /*
3409 * Insert into the tracking and cache. If this fails, free the page.
3410 */
3411 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3412 if (VBOX_FAILURE(rc3))
3413 {
3414 if (rc3 != VERR_PGM_POOL_CLEARED)
3415 {
3416 pPool->cUsedPages--;
3417 pPage->enmKind = PGMPOOLKIND_FREE;
3418 pPage->GCPhys = NIL_RTGCPHYS;
3419 pPage->iNext = pPool->iFreeHead;
3420 pPool->iFreeHead = pPage->idx;
3421 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3422 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3423 return rc3;
3424 }
3425 rc = VERR_PGM_POOL_FLUSHED;
3426 }
3427#endif /* PGMPOOL_WITH_USER_TRACKING */
3428
3429 /*
3430 * Commit the allocation, clear the page and return.
3431 */
3432#ifdef VBOX_WITH_STATISTICS
3433 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3434 pPool->cUsedPagesHigh = pPool->cUsedPages;
3435#endif
3436
3437 if (!pPage->fZeroed)
3438 {
3439 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3440 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3441 ASMMemZeroPage(pv);
3442 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3443 }
3444
3445 *ppPage = pPage;
3446 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3447 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3448 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3449 return rc;
3450}
3451
3452
3453/**
3454 * Frees a usage of a pool page.
3455 *
3456 * @param pVM The VM handle.
3457 * @param HCPhys The HC physical address of the shadow page.
3458 * @param iUser The shadow page pool index of the user table.
3459 * @param iUserTable The index into the user table (shadowed).
3460 */
3461void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3462{
3463 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3464 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3465 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3466}
3467
3468
3469/**
3470 * Gets a in-use page in the pool by it's physical address.
3471 *
3472 * @returns Pointer to the page.
3473 * @param pVM The VM handle.
3474 * @param HCPhys The HC physical address of the shadow page.
3475 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3476 */
3477PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3478{
3479 /** @todo profile this! */
3480 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3481 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3482 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3483 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3484 return pPage;
3485}
3486
3487
3488/**
3489 * Flushes the entire cache.
3490 *
3491 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3492 * and execute this CR3 flush.
3493 *
3494 * @param pPool The pool.
3495 */
3496void pgmPoolFlushAll(PVM pVM)
3497{
3498 LogFlow(("pgmPoolFlushAll:\n"));
3499 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3500}
3501
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette