VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 5006

Last change on this file since 5006 was 4977, checked in by vboxsync, 17 years ago

Backed out most of 24659.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 119.9 KB
Line 
1/* $Id: PGMAllPool.cpp 4977 2007-09-22 00:01:15Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_GC
28# include <VBox/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vm.h>
32#include <VBox/disopcode.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37
38
39/*******************************************************************************
40* Internal Functions *
41*******************************************************************************/
42__BEGIN_DECLS
43static void pgmPoolFlushAllInt(PPGMPOOL pPool);
44#ifdef PGMPOOL_WITH_USER_TRACKING
45DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
46DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
47static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
48#endif
49#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
50static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
51#endif
52#ifdef PGMPOOL_WITH_CACHE
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
54#endif
55#ifdef PGMPOOL_WITH_MONITORING
56static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
57#endif
58#ifndef IN_RING3
59DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
60#endif
61__END_DECLS
62
63
64/**
65 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
66 *
67 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
68 * @param enmKind The page kind.
69 */
70DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
71{
72 switch (enmKind)
73 {
74 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
75 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
76 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
77 return true;
78 default:
79 return false;
80 }
81}
82
83
84#ifdef IN_GC
85/**
86 * Maps a pool page into the current context.
87 *
88 * @returns Pointer to the mapping.
89 * @param pVM The VM handle.
90 * @param pPage The page to map.
91 */
92void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
93{
94 /* general pages. */
95 if (pPage->idx >= PGMPOOL_IDX_FIRST)
96 {
97 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
98 void *pv;
99 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
100 AssertReleaseRC(rc);
101 return pv;
102 }
103
104 /* special pages. */
105 switch (pPage->idx)
106 {
107 case PGMPOOL_IDX_PD:
108 return pVM->pgm.s.pGC32BitPD;
109 case PGMPOOL_IDX_PAE_PD:
110 return pVM->pgm.s.apGCPaePDs[0];
111 case PGMPOOL_IDX_PDPTR:
112 return pVM->pgm.s.pGCPaePDPTR;
113 case PGMPOOL_IDX_PML4:
114 return pVM->pgm.s.pGCPaePML4;
115 default:
116 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
117 return NULL;
118 }
119}
120#endif /* IN_GC */
121
122
123#ifdef PGMPOOL_WITH_MONITORING
124/**
125 * Determin the size of a write instruction.
126 * @returns number of bytes written.
127 * @param pDis The disassembler state.
128 */
129static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
130{
131 /*
132 * This is very crude and possibly wrong for some opcodes,
133 * but since it's not really supposed to be called we can
134 * probably live with that.
135 */
136 return DISGetParamSize(pDis, &pDis->param1);
137}
138
139
140/**
141 * Flushes a chain of pages sharing the same access monitor.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pPool The pool.
145 * @param pPage A page in the chain.
146 */
147int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
148{
149 /*
150 * Find the list head.
151 */
152 uint16_t idx = pPage->idx;
153 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
154 {
155 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
156 {
157 idx = pPage->iMonitoredPrev;
158 Assert(idx != pPage->idx);
159 pPage = &pPool->aPages[idx];
160 }
161 }
162
163 /*
164 * Itereate the list flushing each shadow page.
165 */
166 int rc = VINF_SUCCESS;
167 for (;;)
168 {
169 idx = pPage->iMonitoredNext;
170 Assert(idx != pPage->idx);
171 if (pPage->idx >= PGMPOOL_IDX_FIRST)
172 {
173 int rc2 = pgmPoolFlushPage(pPool, pPage);
174 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
175 rc = VINF_PGM_SYNC_CR3;
176 }
177 /* next */
178 if (idx == NIL_PGMPOOL_IDX)
179 break;
180 pPage = &pPool->aPages[idx];
181 }
182 return rc;
183}
184
185
186/**
187 * Wrapper for getting the current context pointer to the entry being modified.
188 *
189 * @returns Pointer to the current context mapping of the entry.
190 * @param pPool The pool.
191 * @param pvFault The fault virtual address.
192 * @param GCPhysFault The fault physical address.
193 * @param cbEntry The entry size.
194 */
195#ifdef IN_RING3
196DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
197#else
198DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
199#endif
200{
201#ifdef IN_GC
202 return (RTGCPTR)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
203
204#elif defined(IN_RING0)
205 void *pvRet;
206 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
207 AssertFatalRCSuccess(rc);
208 return pvRet;
209
210#elif defined(IN_RING3)
211 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
212#else
213# error "huh?"
214#endif
215}
216
217
218/**
219 * Process shadow entries before they are changed by the guest.
220 *
221 * For PT entries we will clear them. For PD entries, we'll simply check
222 * for mapping conflicts and set the SyncCR3 FF if found.
223 *
224 * @param pPool The pool.
225 * @param pPage The head page.
226 * @param GCPhysFault The guest physical fault address.
227 * @param uAddress In R0 and GC this is the guest context fault address (flat).
228 * In R3 this is the host context 'fault' address.
229 * @param pCpu The disassembler state for figuring out the write size.
230 * This need not be specified if the caller knows we won't do cross entry accesses.
231 */
232#ifdef IN_RING3
233void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
234#else
235void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
236#endif
237{
238 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
239 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
240 for (;;)
241 {
242 union
243 {
244 void *pv;
245 PX86PT pPT;
246 PX86PTPAE pPTPae;
247 PX86PD pPD;
248 PX86PDPAE pPDPae;
249 } uShw;
250 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
251
252 switch (pPage->enmKind)
253 {
254 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
255 {
256 const unsigned iShw = off / sizeof(X86PTE);
257 if (uShw.pPT->a[iShw].n.u1Present)
258 {
259# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
260 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
261 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
262 pgmPoolTracDerefGCPhysHint(pPool, pPage,
263 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
264 pGstPte->u & X86_PTE_PG_MASK);
265# endif
266 uShw.pPT->a[iShw].u = 0;
267 }
268 break;
269 }
270
271 /* page/2 sized */
272 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
273 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
274 {
275 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
276 if (uShw.pPTPae->a[iShw].n.u1Present)
277 {
278# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
279 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
280 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
281 pgmPoolTracDerefGCPhysHint(pPool, pPage,
282 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
283 pGstPte->u & X86_PTE_PG_MASK);
284# endif
285 uShw.pPTPae->a[iShw].u = 0;
286 }
287 }
288 break;
289
290 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
291 {
292 const unsigned iShw = off / sizeof(X86PTPAE);
293 if (uShw.pPTPae->a[iShw].n.u1Present)
294 {
295# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
296 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
297 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
298 pgmPoolTracDerefGCPhysHint(pPool, pPage,
299 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
300 pGstPte->u & X86_PTE_PAE_PG_MASK);
301# endif
302 uShw.pPTPae->a[iShw].u = 0;
303 }
304 break;
305 }
306
307 case PGMPOOLKIND_ROOT_32BIT_PD:
308 {
309 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
310 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
311 {
312 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
313 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
314 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
315 }
316 /* paranoia / a bit assumptive. */
317 else if ( pCpu
318 && (off & 4)
319 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
320 {
321 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
322 if ( iShw2 != iShw
323 && iShw2 < ELEMENTS(uShw.pPD->a)
324 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
325 {
326 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
327 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
328 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
329 }
330 }
331#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
332 if ( uShw.pPD->a[iShw].n.u1Present
333 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
334 {
335 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
336# ifdef IN_GC /* TLB load - we're pushing things a bit... */
337 ASMProbeReadByte(pvAddress);
338# endif
339 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
340 uShw.pPD->a[iShw].u = 0;
341 }
342#endif
343 break;
344 }
345
346 case PGMPOOLKIND_ROOT_PAE_PD:
347 {
348 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
349 for (unsigned i = 0; i < 2; i++, iShw++)
350 {
351 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
352 {
353 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
354 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
355 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
356 }
357 /* paranoia / a bit assumptive. */
358 else if ( pCpu
359 && (off & 4)
360 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
361 {
362 const unsigned iShw2 = iShw + 2;
363 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
364 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
365 {
366 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
367 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
368 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
369 }
370 }
371#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
372 if ( uShw.pPDPae->a[iShw].n.u1Present
373 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
374 {
375 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
376# ifdef IN_GC /* TLB load - we're pushing things a bit... */
377 ASMProbeReadByte(pvAddress);
378# endif
379 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
380 uShw.pPDPae->a[iShw].u = 0;
381 }
382#endif
383 }
384 break;
385 }
386
387 default:
388 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
389 }
390
391 /* next */
392 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
393 return;
394 pPage = &pPool->aPages[pPage->iMonitoredNext];
395 }
396}
397
398
399# ifndef IN_RING3
400/**
401 * Checks if a access could be a fork operation in progress.
402 *
403 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
404 *
405 * @returns true if it's likly that we're forking, otherwise false.
406 * @param pPool The pool.
407 * @param pCpu The disassembled instruction.
408 * @param offFault The access offset.
409 */
410DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
411{
412 /*
413 * i386 linux is using btr to clear X86_PTE_RW.
414 * The functions involved are (2.6.16 source inspection):
415 * clear_bit
416 * ptep_set_wrprotect
417 * copy_one_pte
418 * copy_pte_range
419 * copy_pmd_range
420 * copy_pud_range
421 * copy_page_range
422 * dup_mmap
423 * dup_mm
424 * copy_mm
425 * copy_process
426 * do_fork
427 */
428 if ( pCpu->pCurInstr->opcode == OP_BTR
429 && !(offFault & 4)
430 /** @todo Validate that the bit index is X86_PTE_RW. */
431 )
432 {
433 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
434 return true;
435 }
436 return false;
437}
438
439
440/**
441 * Determin whether the page is likely to have been reused.
442 *
443 * @returns true if we consider the page as being reused for a different purpose.
444 * @returns false if we consider it to still be a paging page.
445 * @param pPage The page in question.
446 * @param pCpu The disassembly info for the faulting insturction.
447 * @param pvFault The fault address.
448 *
449 * @remark The REP prefix check is left to the caller because of STOSD/W.
450 */
451DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
452{
453 switch (pCpu->pCurInstr->opcode)
454 {
455 case OP_PUSH:
456 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
457 return true;
458 case OP_PUSHF:
459 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
460 return true;
461 case OP_PUSHA:
462 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
463 return true;
464 case OP_FXSAVE:
465 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
466 return true;
467 }
468 if ( (pCpu->param1.flags & USE_REG_GEN32)
469 && (pCpu->param1.base.reg_gen32 == USE_REG_ESP))
470 {
471 Log4(("pgmPoolMonitorIsReused: ESP\n"));
472 return true;
473 }
474
475 //if (pPage->fCR3Mix)
476 // return false;
477 return false;
478}
479
480
481/**
482 * Flushes the page being accessed.
483 *
484 * @returns VBox status code suitable for scheduling.
485 * @param pVM The VM handle.
486 * @param pPool The pool.
487 * @param pPage The pool page (head).
488 * @param pCpu The disassembly of the write instruction.
489 * @param pRegFrame The trap register frame.
490 * @param GCPhysFault The fault address as guest physical address.
491 * @param pvFault The fault address.
492 */
493static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
494 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
495{
496 /*
497 * First, do the flushing.
498 */
499 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
500
501 /*
502 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
503 */
504 uint32_t cbWritten;
505 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
506 if (VBOX_SUCCESS(rc2))
507 pRegFrame->eip += pCpu->opsize;
508 else if (rc2 == VERR_EM_INTERPRETER)
509 {
510#ifdef IN_GC
511 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip))
512 {
513 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
514 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
515 rc = VINF_SUCCESS;
516 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
517 }
518 else
519#endif
520 {
521 rc = VINF_EM_RAW_EMULATE_INSTR;
522 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
523 }
524 }
525 else
526 rc = rc2;
527
528 /* See use in pgmPoolAccessHandlerSimple(). */
529 PGM_INVL_GUEST_TLBS();
530
531 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
532 return rc;
533
534}
535
536
537/**
538 * Handles the STOSD write accesses.
539 *
540 * @returns VBox status code suitable for scheduling.
541 * @param pVM The VM handle.
542 * @param pPool The pool.
543 * @param pPage The pool page (head).
544 * @param pCpu The disassembly of the write instruction.
545 * @param pRegFrame The trap register frame.
546 * @param GCPhysFault The fault address as guest physical address.
547 * @param pvFault The fault address.
548 */
549DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
550 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
551{
552 /*
553 * Increment the modification counter and insert it into the list
554 * of modified pages the first time.
555 */
556 if (!pPage->cModifications++)
557 pgmPoolMonitorModifiedInsert(pPool, pPage);
558
559 /*
560 * Execute REP STOSD.
561 *
562 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
563 * write situation, meaning that it's safe to write here.
564 */
565#ifdef IN_GC
566 uint32_t *pu32 = (uint32_t *)pvFault;
567#else
568 RTGCPTR pu32 = pvFault;
569#endif
570 while (pRegFrame->ecx)
571 {
572 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pu32, NULL);
573#ifdef IN_GC
574 *pu32++ = pRegFrame->eax;
575#else
576 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
577 pu32 += 4;
578#endif
579 GCPhysFault += 4;
580 pRegFrame->edi += 4;
581 pRegFrame->ecx--;
582 }
583 pRegFrame->eip += pCpu->opsize;
584
585 /* See use in pgmPoolAccessHandlerSimple(). */
586 PGM_INVL_GUEST_TLBS();
587
588 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
589 return VINF_SUCCESS;
590}
591
592
593/**
594 * Handles the simple write accesses.
595 *
596 * @returns VBox status code suitable for scheduling.
597 * @param pVM The VM handle.
598 * @param pPool The pool.
599 * @param pPage The pool page (head).
600 * @param pCpu The disassembly of the write instruction.
601 * @param pRegFrame The trap register frame.
602 * @param GCPhysFault The fault address as guest physical address.
603 * @param pvFault The fault address.
604 */
605DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
606 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
607{
608 /*
609 * Increment the modification counter and insert it into the list
610 * of modified pages the first time.
611 */
612 if (!pPage->cModifications++)
613 pgmPoolMonitorModifiedInsert(pPool, pPage);
614
615 /*
616 * Clear all the pages. ASSUMES that pvFault is readable.
617 */
618 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
619
620 /*
621 * Interpret the instruction.
622 */
623 uint32_t cb;
624 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
625 if (VBOX_SUCCESS(rc))
626 pRegFrame->eip += pCpu->opsize;
627 else if (rc == VERR_EM_INTERPRETER)
628 {
629# ifdef IN_GC
630 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)(RTGCUINTPTR)pCpu->opaddr))
631 {
632 /* We're not able to handle this in ring-3, so fix the interpreter! */
633 /** @note Should be fine. There's no need to flush the whole thing. */
634#ifndef DEBUG_sandervl
635 AssertMsgFailed(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
636 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
637#endif
638 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch1);
639 rc = pgmPoolMonitorChainFlush(pPool, pPage);
640 }
641 else
642# endif
643 {
644 rc = VINF_EM_RAW_EMULATE_INSTR;
645 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
646 }
647 }
648
649 /*
650 * Quick hack, with logging enabled we're getting stale
651 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
652 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
653 * have to be fixed to support this. But that'll have to wait till next week.
654 *
655 * An alternative is to keep track of the changed PTEs together with the
656 * GCPhys from the guest PT. This may proove expensive though.
657 *
658 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
659 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
660 */
661 PGM_INVL_GUEST_TLBS();
662
663 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
664 return rc;
665}
666
667
668/**
669 * \#PF Handler callback for PT write accesses.
670 *
671 * @returns VBox status code (appropriate for GC return).
672 * @param pVM VM Handle.
673 * @param uErrorCode CPU Error code.
674 * @param pRegFrame Trap register frame.
675 * NULL on DMA and other non CPU access.
676 * @param pvFault The fault address (cr2).
677 * @param GCPhysFault The GC physical address corresponding to pvFault.
678 * @param pvUser User argument.
679 */
680DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
681{
682 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
683 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
684 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
685 LogFlow(("pgmPoolAccessHandler: pvFault=%p pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
686
687 /*
688 * We should ALWAYS have the list head as user parameter. This
689 * is because we use that page to record the changes.
690 */
691 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
692
693 /*
694 * Disassemble the faulting instruction.
695 */
696 DISCPUSTATE Cpu;
697 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
698 AssertRCReturn(rc, rc);
699
700 /*
701 * Check if it's worth dealing with.
702 */
703 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
704 || pPage->fCR3Mix)
705 && !pgmPoolMonitorIsReused(pPage, &Cpu,pvFault)
706 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
707 {
708 /*
709 * Simple instructions, no REP prefix.
710 */
711 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
712 {
713 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
714 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
715 return rc;
716 }
717
718 /*
719 * Windows is frequently doing small memset() operations (netio test 4k+).
720 * We have to deal with these or we'll kill the cache and performance.
721 */
722 if ( Cpu.pCurInstr->opcode == OP_STOSWD
723 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
724 && pRegFrame->ecx <= 0x20
725 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
726 && !((uintptr_t)pvFault & 3)
727 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
728 && Cpu.mode == CPUMODE_32BIT
729 && Cpu.opmode == CPUMODE_32BIT
730 && Cpu.addrmode == CPUMODE_32BIT
731 && Cpu.prefix == PREFIX_REP
732 && !pRegFrame->eflags.Bits.u1DF
733 )
734 {
735 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
736 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
737 return rc;
738 }
739
740 /* REP prefix, don't bother. */
741 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
742 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
743 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
744 }
745
746 /*
747 * Not worth it, so flush it.
748 */
749 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
750 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
751 return rc;
752}
753
754# endif /* !IN_RING3 */
755#endif /* PGMPOOL_WITH_MONITORING */
756
757
758
759#ifdef PGMPOOL_WITH_CACHE
760/**
761 * Inserts a page into the GCPhys hash table.
762 *
763 * @param pPool The pool.
764 * @param pPage The page.
765 */
766DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
767{
768 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
769 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
770 pPage->iNext = pPool->aiHash[iHash];
771 pPool->aiHash[iHash] = pPage->idx;
772}
773
774
775/**
776 * Removes a page from the GCPhys hash table.
777 *
778 * @param pPool The pool.
779 * @param pPage The page.
780 */
781DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
782{
783 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
784 if (pPool->aiHash[iHash] == pPage->idx)
785 pPool->aiHash[iHash] = pPage->iNext;
786 else
787 {
788 uint16_t iPrev = pPool->aiHash[iHash];
789 for (;;)
790 {
791 const int16_t i = pPool->aPages[iPrev].iNext;
792 if (i == pPage->idx)
793 {
794 pPool->aPages[iPrev].iNext = pPage->iNext;
795 break;
796 }
797 if (i == NIL_PGMPOOL_IDX)
798 {
799 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
800 break;
801 }
802 iPrev = i;
803 }
804 }
805 pPage->iNext = NIL_PGMPOOL_IDX;
806}
807
808
809/**
810 * Frees up one cache page.
811 *
812 * @returns VBox status code.
813 * @retval VINF_SUCCESS on success.
814 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
815 * @param pPool The pool.
816 * @param iUser The user index.
817 */
818static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
819{
820 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
821 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
822
823 /*
824 * Select one page from the tail of the age list.
825 */
826 uint16_t iToFree = pPool->iAgeTail;
827 if (iToFree == iUser)
828 iToFree = pPool->aPages[iToFree].iAgePrev;
829/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
830 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
831 {
832 uint16_t i = pPool->aPages[iToFree].iAgePrev;
833 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
834 {
835 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
836 continue;
837 iToFree = i;
838 break;
839 }
840 }
841*/
842 Assert(iToFree != iUser);
843 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
844
845 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
846 if (rc == VINF_SUCCESS)
847 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
848 return rc;
849}
850
851
852/**
853 * Checks if a kind mismatch is really a page being reused
854 * or if it's just normal remappings.
855 *
856 * @returns true if reused and the cached page (enmKind1) should be flushed
857 * @returns false if not reused.
858 * @param enmKind1 The kind of the cached page.
859 * @param enmKind2 The kind of the requested page.
860 */
861static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
862{
863 switch (enmKind1)
864 {
865 /*
866 * Never reuse them. There is no remapping in non-paging mode.
867 */
868 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
869 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
870 return true;
871
872 /*
873 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
874 */
875 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
876 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
877 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
878 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
879 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
880 switch (enmKind2)
881 {
882 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
883 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
884 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
885 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
886 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
887 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
888 return true;
889 default:
890 return false;
891 }
892
893 /*
894 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
895 */
896 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
897 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
898 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
899 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
900 switch (enmKind2)
901 {
902 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
903 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
904 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
905 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
906 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
907 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
908 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
909 return true;
910 default:
911 return false;
912 }
913
914 /*
915 * These cannot be flushed, and it's common to reuse the PDs as PTs.
916 */
917 case PGMPOOLKIND_ROOT_32BIT_PD:
918 case PGMPOOLKIND_ROOT_PAE_PD:
919 case PGMPOOLKIND_ROOT_PDPTR:
920 case PGMPOOLKIND_ROOT_PML4:
921 return false;
922
923 default:
924 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
925 }
926}
927
928
929/**
930 * Attempts to satisfy a pgmPoolAlloc request from the cache.
931 *
932 * @returns VBox status code.
933 * @retval VINF_PGM_CACHED_PAGE on success.
934 * @retval VERR_FILE_NOT_FOUND if not found.
935 * @param pPool The pool.
936 * @param GCPhys The GC physical address of the page we're gonna shadow.
937 * @param enmKind The kind of mapping.
938 * @param iUser The shadow page pool index of the user table.
939 * @param iUserTable The index into the user table (shadowed).
940 * @param ppPage Where to store the pointer to the page.
941 */
942static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
943{
944 /*
945 * Look up the GCPhys in the hash.
946 */
947 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
948 if (i != NIL_PGMPOOL_IDX)
949 {
950 do
951 {
952 PPGMPOOLPAGE pPage = &pPool->aPages[i];
953 if (pPage->GCPhys == GCPhys)
954 {
955 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
956 {
957 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
958 if (VBOX_SUCCESS(rc))
959 {
960 *ppPage = pPage;
961 STAM_COUNTER_INC(&pPool->StatCacheHits);
962 return VINF_PGM_CACHED_PAGE;
963 }
964 return rc;
965 }
966
967 /*
968 * The kind is different. In some cases we should now flush the page
969 * as it has been reused, but in most cases this is normal remapping
970 * of PDs as PT or big pages using the GCPhys field in a slightly
971 * different way than the other kinds.
972 */
973 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
974 {
975 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
976 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
977 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
978 break;
979 }
980 }
981
982 /* next */
983 i = pPage->iNext;
984 } while (i != NIL_PGMPOOL_IDX);
985 }
986
987 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
988 STAM_COUNTER_INC(&pPool->StatCacheMisses);
989 return VERR_FILE_NOT_FOUND;
990}
991
992
993/**
994 * Inserts a page into the cache.
995 *
996 * @param pPool The pool.
997 * @param pPage The cached page.
998 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
999 */
1000static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1001{
1002 /*
1003 * Insert into the GCPhys hash if the page is fit for that.
1004 */
1005 Assert(!pPage->fCached);
1006 if (fCanBeCached)
1007 {
1008 pPage->fCached = true;
1009 pgmPoolHashInsert(pPool, pPage);
1010 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1011 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1012 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1013 }
1014 else
1015 {
1016 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1017 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1018 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1019 }
1020
1021 /*
1022 * Insert at the head of the age list.
1023 */
1024 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1025 pPage->iAgeNext = pPool->iAgeHead;
1026 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1027 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1028 else
1029 pPool->iAgeTail = pPage->idx;
1030 pPool->iAgeHead = pPage->idx;
1031}
1032
1033
1034/**
1035 * Flushes a cached page.
1036 *
1037 * @param pPool The pool.
1038 * @param pPage The cached page.
1039 */
1040static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1041{
1042 /*
1043 * Remove the page from the hash.
1044 */
1045 if (pPage->fCached)
1046 {
1047 pPage->fCached = false;
1048 pgmPoolHashRemove(pPool, pPage);
1049 }
1050 else
1051 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1052
1053 /*
1054 * Remove it from the age list.
1055 */
1056 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1057 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1058 else
1059 pPool->iAgeTail = pPage->iAgePrev;
1060 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1061 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1062 else
1063 pPool->iAgeHead = pPage->iAgeNext;
1064 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1065 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1066}
1067#endif /* PGMPOOL_WITH_CACHE */
1068
1069
1070#ifdef PGMPOOL_WITH_MONITORING
1071/**
1072 * Looks for pages sharing the monitor.
1073 *
1074 * @returns Pointer to the head page.
1075 * @returns NULL if not found.
1076 * @param pPool The Pool
1077 * @param pNewPage The page which is going to be monitored.
1078 */
1079static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1080{
1081#ifdef PGMPOOL_WITH_CACHE
1082 /*
1083 * Look up the GCPhys in the hash.
1084 */
1085 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1086 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1087 if (i == NIL_PGMPOOL_IDX)
1088 return NULL;
1089 do
1090 {
1091 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1092 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1093 && pPage != pNewPage)
1094 {
1095 switch (pPage->enmKind)
1096 {
1097 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1098 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1099 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1100 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1101 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1102 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1103 case PGMPOOLKIND_ROOT_32BIT_PD:
1104 case PGMPOOLKIND_ROOT_PAE_PD:
1105 case PGMPOOLKIND_ROOT_PDPTR:
1106 case PGMPOOLKIND_ROOT_PML4:
1107 {
1108 /* find the head */
1109 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1110 {
1111 Assert(pPage->iMonitoredPrev != pPage->idx);
1112 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1113 }
1114 return pPage;
1115 }
1116
1117 /* ignore, no monitoring. */
1118 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1119 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1120 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1121 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1122 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1123 break;
1124 default:
1125 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1126 }
1127 }
1128
1129 /* next */
1130 i = pPage->iNext;
1131 } while (i != NIL_PGMPOOL_IDX);
1132#endif
1133 return NULL;
1134}
1135
1136/**
1137 * Enabled write monitoring of a guest page.
1138 *
1139 * @returns VBox status code.
1140 * @retval VINF_SUCCESS on success.
1141 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1142 * @param pPool The pool.
1143 * @param pPage The cached page.
1144 */
1145static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1146{
1147 /*
1148 * Filter out the relevant kinds.
1149 */
1150 switch (pPage->enmKind)
1151 {
1152 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1153 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1154 break;
1155
1156 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1157 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1158 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1159 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1160 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1161 /* Nothing to monitor here. */
1162 return VINF_SUCCESS;
1163
1164 case PGMPOOLKIND_ROOT_32BIT_PD:
1165 case PGMPOOLKIND_ROOT_PAE_PD:
1166#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1167 break;
1168#endif
1169 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1170 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1171 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1172 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1173 case PGMPOOLKIND_ROOT_PDPTR:
1174 case PGMPOOLKIND_ROOT_PML4:
1175 default:
1176 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1177 }
1178
1179 /*
1180 * Install handler.
1181 */
1182 int rc;
1183 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1184 if (pPageHead)
1185 {
1186 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1187 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1188 pPage->iMonitoredPrev = pPageHead->idx;
1189 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1190 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1191 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1192 pPageHead->iMonitoredNext = pPage->idx;
1193 rc = VINF_SUCCESS;
1194 }
1195 else
1196 {
1197 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1198 PVM pVM = pPool->CTXSUFF(pVM);
1199 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1200 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1201 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1202 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1203 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1204 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
1205 pPool->pszAccessHandler);
1206 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1207 * the heap size should suffice. */
1208 AssertFatalRC(rc);
1209 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1210 rc = VERR_PGM_POOL_CLEARED;
1211 }
1212 pPage->fMonitored = true;
1213 return rc;
1214}
1215
1216
1217/**
1218 * Disables write monitoring of a guest page.
1219 *
1220 * @returns VBox status code.
1221 * @retval VINF_SUCCESS on success.
1222 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1223 * @param pPool The pool.
1224 * @param pPage The cached page.
1225 */
1226static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1227{
1228 /*
1229 * Filter out the relevant kinds.
1230 */
1231 switch (pPage->enmKind)
1232 {
1233 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1234 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1235 break;
1236
1237 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1238 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1239 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1240 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1241 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1242 /* Nothing to monitor here. */
1243 return VINF_SUCCESS;
1244
1245 case PGMPOOLKIND_ROOT_32BIT_PD:
1246 case PGMPOOLKIND_ROOT_PAE_PD:
1247#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1248 break;
1249#endif
1250 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1251 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1252 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1253 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1254 case PGMPOOLKIND_ROOT_PDPTR:
1255 case PGMPOOLKIND_ROOT_PML4:
1256 default:
1257 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1258 }
1259
1260 /*
1261 * Remove the page from the monitored list or uninstall it if last.
1262 */
1263 const PVM pVM = pPool->CTXSUFF(pVM);
1264 int rc;
1265 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1266 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1267 {
1268 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1269 {
1270 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1271 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1272 pNewHead->fCR3Mix = pPage->fCR3Mix;
1273 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1274 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1275 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1276 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pNewHead),
1277 pPool->pszAccessHandler);
1278 AssertFatalRCSuccess(rc);
1279 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1280 }
1281 else
1282 {
1283 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1284 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1285 {
1286 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1287 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1288 }
1289 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1290 rc = VINF_SUCCESS;
1291 }
1292 }
1293 else
1294 {
1295 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1296 AssertFatalRC(rc);
1297 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1298 rc = VERR_PGM_POOL_CLEARED;
1299 }
1300 pPage->fMonitored = false;
1301
1302 /*
1303 * Remove it from the list of modified pages (if in it).
1304 */
1305 pgmPoolMonitorModifiedRemove(pPool, pPage);
1306
1307 return rc;
1308}
1309
1310
1311#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1312/**
1313 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1314 *
1315 * @param pPool The Pool.
1316 * @param pPage A page in the chain.
1317 * @param fCR3Mix The new fCR3Mix value.
1318 */
1319static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1320{
1321 /* current */
1322 pPage->fCR3Mix = fCR3Mix;
1323
1324 /* before */
1325 int16_t idx = pPage->iMonitoredPrev;
1326 while (idx != NIL_PGMPOOL_IDX)
1327 {
1328 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1329 idx = pPool->aPages[idx].iMonitoredPrev;
1330 }
1331
1332 /* after */
1333 idx = pPage->iMonitoredNext;
1334 while (idx != NIL_PGMPOOL_IDX)
1335 {
1336 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1337 idx = pPool->aPages[idx].iMonitoredNext;
1338 }
1339}
1340
1341
1342/**
1343 * Installs or modifies monitoring of a CR3 page (special).
1344 *
1345 * We're pretending the CR3 page is shadowed by the pool so we can use the
1346 * generic mechanisms in detecting chained monitoring. (This also gives us a
1347 * tast of what code changes are required to really pool CR3 shadow pages.)
1348 *
1349 * @returns VBox status code.
1350 * @param pPool The pool.
1351 * @param idxRoot The CR3 (root) page index.
1352 * @param GCPhysCR3 The (new) CR3 value.
1353 */
1354int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1355{
1356 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1357 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1358 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1359 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1360
1361 /*
1362 * The unlikely case where it already matches.
1363 */
1364 if (pPage->GCPhys == GCPhysCR3)
1365 {
1366 Assert(pPage->fMonitored);
1367 return VINF_SUCCESS;
1368 }
1369
1370 /*
1371 * Flush the current monitoring and remove it from the hash.
1372 */
1373 int rc = VINF_SUCCESS;
1374 if (pPage->fMonitored)
1375 {
1376 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1377 rc = pgmPoolMonitorFlush(pPool, pPage);
1378 if (rc == VERR_PGM_POOL_CLEARED)
1379 rc = VINF_SUCCESS;
1380 else
1381 AssertFatalRC(rc);
1382 pgmPoolHashRemove(pPool, pPage);
1383 }
1384
1385 /*
1386 * Monitor the page at the new location and insert it into the hash.
1387 */
1388 pPage->GCPhys = GCPhysCR3;
1389 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1390 if (rc2 != VERR_PGM_POOL_CLEARED)
1391 {
1392 AssertFatalRC(rc2);
1393 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1394 rc = rc2;
1395 }
1396 pgmPoolHashInsert(pPool, pPage);
1397 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1398 return rc;
1399}
1400
1401
1402/**
1403 * Removes the monitoring of a CR3 page (special).
1404 *
1405 * @returns VBox status code.
1406 * @param pPool The pool.
1407 * @param idxRoot The CR3 (root) page index.
1408 */
1409int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1410{
1411 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1412 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1413 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1414 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1415
1416 if (!pPage->fMonitored)
1417 return VINF_SUCCESS;
1418
1419 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1420 int rc = pgmPoolMonitorFlush(pPool, pPage);
1421 if (rc != VERR_PGM_POOL_CLEARED)
1422 AssertFatalRC(rc);
1423 else
1424 rc = VINF_SUCCESS;
1425 pgmPoolHashRemove(pPool, pPage);
1426 Assert(!pPage->fMonitored);
1427 pPage->GCPhys = NIL_RTGCPHYS;
1428 return rc;
1429}
1430#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1431
1432
1433/**
1434 * Inserts the page into the list of modified pages.
1435 *
1436 * @param pPool The pool.
1437 * @param pPage The page.
1438 */
1439void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1440{
1441 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1442 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1443 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1444 && pPool->iModifiedHead != pPage->idx,
1445 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1446 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1447 pPool->iModifiedHead, pPool->cModifiedPages));
1448
1449 pPage->iModifiedNext = pPool->iModifiedHead;
1450 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1451 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1452 pPool->iModifiedHead = pPage->idx;
1453 pPool->cModifiedPages++;
1454#ifdef VBOX_WITH_STATISTICS
1455 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1456 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1457#endif
1458}
1459
1460
1461/**
1462 * Removes the page from the list of modified pages and resets the
1463 * moficiation counter.
1464 *
1465 * @param pPool The pool.
1466 * @param pPage The page which is believed to be in the list of modified pages.
1467 */
1468static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1469{
1470 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1471 if (pPool->iModifiedHead == pPage->idx)
1472 {
1473 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1474 pPool->iModifiedHead = pPage->iModifiedNext;
1475 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1476 {
1477 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1478 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1479 }
1480 pPool->cModifiedPages--;
1481 }
1482 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1483 {
1484 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1485 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1486 {
1487 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1488 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1489 }
1490 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1491 pPool->cModifiedPages--;
1492 }
1493 else
1494 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1495 pPage->cModifications = 0;
1496}
1497
1498
1499/**
1500 * Zaps the list of modified pages, resetting their modification counters in the process.
1501 *
1502 * @param pVM The VM handle.
1503 */
1504void pgmPoolMonitorModifiedClearAll(PVM pVM)
1505{
1506 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1507 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1508
1509 unsigned cPages = 0; NOREF(cPages);
1510 uint16_t idx = pPool->iModifiedHead;
1511 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1512 while (idx != NIL_PGMPOOL_IDX)
1513 {
1514 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1515 idx = pPage->iModifiedNext;
1516 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1517 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1518 pPage->cModifications = 0;
1519 Assert(++cPages);
1520 }
1521 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1522 pPool->cModifiedPages = 0;
1523}
1524
1525
1526/**
1527 * Clear all shadow pages and clear all modification counters.
1528 *
1529 * @param pVM The VM handle.
1530 * @remark Should only be used when monitoring is available, thus placed in
1531 * the PGMPOOL_WITH_MONITORING #ifdef.
1532 */
1533void pgmPoolClearAll(PVM pVM)
1534{
1535 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1536 STAM_PROFILE_START(&pPool->StatClearAll, c);
1537 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1538
1539 /*
1540 * Iterate all the pages until we've encountered all that in use.
1541 * This is simple but not quite optimal solution.
1542 */
1543 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1544 unsigned cLeft = pPool->cUsedPages;
1545 unsigned iPage = pPool->cCurPages;
1546 while (--iPage >= PGMPOOL_IDX_FIRST)
1547 {
1548 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1549 if (pPage->GCPhys != NIL_RTGCPHYS)
1550 {
1551 switch (pPage->enmKind)
1552 {
1553 /*
1554 * We only care about shadow page tables.
1555 */
1556 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1557 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1558 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1559 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1560 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1561 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1562 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1563 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1564 {
1565#ifdef PGMPOOL_WITH_USER_TRACKING
1566 if (pPage->cPresent)
1567#endif
1568 {
1569 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1570 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1571 ASMMemZeroPage(pvShw);
1572 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1573#ifdef PGMPOOL_WITH_USER_TRACKING
1574 pPage->cPresent = 0;
1575 pPage->iFirstPresent = ~0;
1576#endif
1577 }
1578 }
1579 /* fall thru */
1580
1581 default:
1582 Assert(!pPage->cModifications || ++cModifiedPages);
1583 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1584 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1585 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1586 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1587 pPage->cModifications = 0;
1588 break;
1589
1590 }
1591 if (!--cLeft)
1592 break;
1593 }
1594 }
1595
1596 /* swipe the special pages too. */
1597 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1598 {
1599 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1600 if (pPage->GCPhys != NIL_RTGCPHYS)
1601 {
1602 Assert(!pPage->cModifications || ++cModifiedPages);
1603 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1604 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1605 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1606 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1607 pPage->cModifications = 0;
1608 }
1609 }
1610
1611 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1612 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1613 pPool->cModifiedPages = 0;
1614
1615#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1616 /*
1617 * Clear all the GCPhys links and rebuild the phys ext free list.
1618 */
1619 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
1620 pRam;
1621 pRam = pRam->CTXSUFF(pNext))
1622 {
1623 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1624 while (iPage-- > 0)
1625 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1626 }
1627
1628 pPool->iPhysExtFreeHead = 0;
1629 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1630 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1631 for (unsigned i = 0; i < cMaxPhysExts; i++)
1632 {
1633 paPhysExts[i].iNext = i + 1;
1634 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1635 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1636 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1637 }
1638 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1639#endif
1640
1641
1642 pPool->cPresent = 0;
1643 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1644}
1645#endif /* PGMPOOL_WITH_MONITORING */
1646
1647
1648#ifdef PGMPOOL_WITH_USER_TRACKING
1649/**
1650 * Frees up at least one user entry.
1651 *
1652 * @returns VBox status code.
1653 * @retval VINF_SUCCESS if successfully added.
1654 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1655 * @param pPool The pool.
1656 * @param iUser The user index.
1657 */
1658static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1659{
1660 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1661#ifdef PGMPOOL_WITH_CACHE
1662 /*
1663 * Just free cached pages in a braindead fashion.
1664 */
1665 /** @todo walk the age list backwards and free the first with usage. */
1666 int rc = VINF_SUCCESS;
1667 do
1668 {
1669 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1670 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1671 rc = rc2;
1672 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1673 return rc;
1674#else
1675 /*
1676 * Lazy approach.
1677 */
1678 pgmPoolFlushAllInt(pPool);
1679 return VERR_PGM_POOL_FLUSHED;
1680#endif
1681}
1682
1683
1684/**
1685 * Inserts a page into the cache.
1686 *
1687 * This will create user node for the page, insert it into the GCPhys
1688 * hash, and insert it into the age list.
1689 *
1690 * @returns VBox status code.
1691 * @retval VINF_SUCCESS if successfully added.
1692 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1693 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1694 * @param pPool The pool.
1695 * @param pPage The cached page.
1696 * @param GCPhys The GC physical address of the page we're gonna shadow.
1697 * @param iUser The user index.
1698 * @param iUserTable The user table index.
1699 */
1700DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1701{
1702 int rc = VINF_SUCCESS;
1703 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1704
1705 /*
1706 * Find free a user node.
1707 */
1708 uint16_t i = pPool->iUserFreeHead;
1709 if (i == NIL_PGMPOOL_USER_INDEX)
1710 {
1711 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1712 if (VBOX_FAILURE(rc))
1713 return rc;
1714 i = pPool->iUserFreeHead;
1715 }
1716
1717 /*
1718 * Unlink the user node from the free list,
1719 * initialize and insert it into the user list.
1720 */
1721 pPool->iUserFreeHead = pUser[i].iNext;
1722 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1723 pUser[i].iUser = iUser;
1724 pUser[i].iUserTable = iUserTable;
1725 pPage->iUserHead = i;
1726
1727 /*
1728 * Insert into cache and enable monitoring of the guest page if enabled.
1729 *
1730 * Until we implement caching of all levels, including the CR3 one, we'll
1731 * have to make sure we don't try monitor & cache any recursive reuse of
1732 * a monitored CR3 page. Because all windows versions are doing this we'll
1733 * have to be able to do combined access monitoring, CR3 + PT and
1734 * PD + PT (guest PAE).
1735 *
1736 * Update:
1737 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1738 */
1739#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1740# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1741 const bool fCanBeMonitored = true;
1742# else
1743 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1744 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1745 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1746# endif
1747# ifdef PGMPOOL_WITH_CACHE
1748 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1749# endif
1750 if (fCanBeMonitored)
1751 {
1752# ifdef PGMPOOL_WITH_MONITORING
1753 rc = pgmPoolMonitorInsert(pPool, pPage);
1754 if (rc == VERR_PGM_POOL_CLEARED)
1755 {
1756 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1757# ifndef PGMPOOL_WITH_CACHE
1758 pgmPoolMonitorFlush(pPool, pPage);
1759 rc = VERR_PGM_POOL_FLUSHED;
1760# endif
1761 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1762 pUser[i].iNext = pPool->iUserFreeHead;
1763 pUser[i].iUser = NIL_PGMPOOL_IDX;
1764 pPool->iUserFreeHead = i;
1765 }
1766 }
1767# endif
1768#endif /* PGMPOOL_WITH_MONITORING */
1769 return rc;
1770}
1771
1772
1773# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1774/**
1775 * Adds a user reference to a page.
1776 *
1777 * This will
1778 * This will move the page to the head of the
1779 *
1780 * @returns VBox status code.
1781 * @retval VINF_SUCCESS if successfully added.
1782 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1783 * @param pPool The pool.
1784 * @param pPage The cached page.
1785 * @param iUser The user index.
1786 * @param iUserTable The user table.
1787 */
1788static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1789{
1790 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1791
1792# ifdef VBOX_STRICT
1793 /*
1794 * Check that the entry doesn't already exists.
1795 */
1796 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1797 {
1798 uint16_t i = pPage->iUserHead;
1799 do
1800 {
1801 Assert(i < pPool->cMaxUsers);
1802 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%d %d\n", iUser, iUserTable));
1803 i = paUsers[i].iNext;
1804 } while (i != NIL_PGMPOOL_USER_INDEX);
1805 }
1806# endif
1807
1808 /*
1809 * Allocate a user node.
1810 */
1811 uint16_t i = pPool->iUserFreeHead;
1812 if (i == NIL_PGMPOOL_USER_INDEX)
1813 {
1814 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1815 if (VBOX_FAILURE(rc))
1816 return rc;
1817 i = pPool->iUserFreeHead;
1818 }
1819 pPool->iUserFreeHead = paUsers[i].iNext;
1820
1821 /*
1822 * Initialize the user node and insert it.
1823 */
1824 paUsers[i].iNext = pPage->iUserHead;
1825 paUsers[i].iUser = iUser;
1826 paUsers[i].iUserTable = iUserTable;
1827 pPage->iUserHead = i;
1828
1829# ifdef PGMPOOL_WITH_CACHE
1830 /*
1831 * Tell the cache to update its replacement stats for this page.
1832 */
1833 pgmPoolCacheUsed(pPool, pPage);
1834# endif
1835 return VINF_SUCCESS;
1836}
1837# endif /* PGMPOOL_WITH_CACHE */
1838
1839
1840/**
1841 * Frees a user record associated with a page.
1842 *
1843 * This does not clear the entry in the user table, it simply replaces the
1844 * user record to the chain of free records.
1845 *
1846 * @param pPool The pool.
1847 * @param HCPhys The HC physical address of the shadow page.
1848 * @param iUser The shadow page pool index of the user table.
1849 * @param iUserTable The index into the user table (shadowed).
1850 */
1851static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1852{
1853 /*
1854 * Unlink and free the specified user entry.
1855 */
1856 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1857
1858 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1859 uint16_t i = pPage->iUserHead;
1860 if ( i != NIL_PGMPOOL_USER_INDEX
1861 && paUsers[i].iUser == iUser
1862 && paUsers[i].iUserTable == iUserTable)
1863 {
1864 pPage->iUserHead = paUsers[i].iNext;
1865
1866 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1867 paUsers[i].iNext = pPool->iUserFreeHead;
1868 pPool->iUserFreeHead = i;
1869 return;
1870 }
1871
1872 /* General: Linear search. */
1873 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1874 while (i != NIL_PGMPOOL_USER_INDEX)
1875 {
1876 if ( paUsers[i].iUser == iUser
1877 && paUsers[i].iUserTable == iUserTable)
1878 {
1879 if (iPrev != NIL_PGMPOOL_USER_INDEX)
1880 paUsers[iPrev].iNext = paUsers[i].iNext;
1881 else
1882 pPage->iUserHead = paUsers[i].iNext;
1883
1884 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1885 paUsers[i].iNext = pPool->iUserFreeHead;
1886 pPool->iUserFreeHead = i;
1887 return;
1888 }
1889 iPrev = i;
1890 i = paUsers[i].iNext;
1891 }
1892
1893 /* Fatal: didn't find it */
1894 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
1895 iUser, iUserTable, pPage->GCPhys));
1896}
1897
1898
1899/**
1900 * Gets the entry size of a shadow table.
1901 *
1902 * @param enmKind The kind of page.
1903 *
1904 * @returns The size of the entry in bytes. That is, 4 or 8.
1905 * @returns If the kind is not for a table, an assertion is raised and 0 is
1906 * returned.
1907 */
1908DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
1909{
1910 switch (enmKind)
1911 {
1912 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1913 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1914 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1915 case PGMPOOLKIND_ROOT_32BIT_PD:
1916 return 4;
1917
1918 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1919 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1920 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1921 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1922 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1923 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1924 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1925 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1926 case PGMPOOLKIND_ROOT_PAE_PD:
1927 case PGMPOOLKIND_ROOT_PDPTR:
1928 case PGMPOOLKIND_ROOT_PML4:
1929 return 8;
1930
1931 default:
1932 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1933 }
1934}
1935
1936
1937/**
1938 * Gets the entry size of a guest table.
1939 *
1940 * @param enmKind The kind of page.
1941 *
1942 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
1943 * @returns If the kind is not for a table, an assertion is raised and 0 is
1944 * returned.
1945 */
1946DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
1947{
1948 switch (enmKind)
1949 {
1950 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1951 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1952 case PGMPOOLKIND_ROOT_32BIT_PD:
1953 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1954 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1955 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1956 return 4;
1957
1958 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1959 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1960 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1961 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1962 case PGMPOOLKIND_ROOT_PAE_PD:
1963 case PGMPOOLKIND_ROOT_PDPTR:
1964 case PGMPOOLKIND_ROOT_PML4:
1965 return 8;
1966
1967 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1968 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1969 /** @todo can we return 0? (nobody is calling this...) */
1970 return 0;
1971
1972 default:
1973 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1974 }
1975}
1976
1977
1978#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1979/**
1980 * Scans one shadow page table for mappings of a physical page.
1981 *
1982 * @param pVM The VM handle.
1983 * @param pPhysPage The guest page in question.
1984 * @param iShw The shadow page table.
1985 * @param cRefs The number of references made in that PT.
1986 */
1987static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
1988{
1989 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
1990 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1991
1992 /*
1993 * Assert sanity.
1994 */
1995 Assert(cRefs == 1);
1996 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
1997 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
1998
1999 /*
2000 * Then, clear the actual mappings to the page in the shadow PT.
2001 */
2002 switch (pPage->enmKind)
2003 {
2004 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2005 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2006 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2007 {
2008 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2009 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2010 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2011 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2012 {
2013 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2014 pPT->a[i].u = 0;
2015 cRefs--;
2016 if (!cRefs)
2017 return;
2018 }
2019#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2020 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2021 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2022 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2023 {
2024 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2025 pPT->a[i].u = 0;
2026 }
2027#endif
2028 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2029 break;
2030 }
2031
2032 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2033 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2034 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2035 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2036 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2037 {
2038 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2039 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2040 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2041 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2042 {
2043 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2044 pPT->a[i].u = 0;
2045 cRefs--;
2046 if (!cRefs)
2047 return;
2048 }
2049#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2050 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2051 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2052 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2053 {
2054 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2055 pPT->a[i].u = 0;
2056 }
2057#endif
2058 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2059 break;
2060 }
2061
2062 default:
2063 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2064 }
2065}
2066
2067
2068/**
2069 * Scans one shadow page table for mappings of a physical page.
2070 *
2071 * @param pVM The VM handle.
2072 * @param pPhysPage The guest page in question.
2073 * @param iShw The shadow page table.
2074 * @param cRefs The number of references made in that PT.
2075 */
2076void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2077{
2078 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2079 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2080 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2081 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2082 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2083 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2084}
2085
2086
2087/**
2088 * Flushes a list of shadow page tables mapping the same physical page.
2089 *
2090 * @param pVM The VM handle.
2091 * @param pPhysPage The guest page in question.
2092 * @param iPhysExt The physical cross reference extent list to flush.
2093 */
2094void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2095{
2096 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2097 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2098 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2099
2100 const uint16_t iPhysExtStart = iPhysExt;
2101 PPGMPOOLPHYSEXT pPhysExt;
2102 do
2103 {
2104 Assert(iPhysExt < pPool->cMaxPhysExts);
2105 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2106 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2107 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2108 {
2109 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2110 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2111 }
2112
2113 /* next */
2114 iPhysExt = pPhysExt->iNext;
2115 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2116
2117 /* insert the list into the free list and clear the ram range entry. */
2118 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2119 pPool->iPhysExtFreeHead = iPhysExtStart;
2120 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2121
2122 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2123}
2124#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2125
2126
2127/**
2128 * Scans all shadow page tables for mappings of a physical page.
2129 *
2130 * This may be slow, but it's most likely more efficient than cleaning
2131 * out the entire page pool / cache.
2132 *
2133 * @returns VBox status code.
2134 * @retval VINF_SUCCESS if all references has been successfully cleared.
2135 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2136 * a page pool cleaning.
2137 *
2138 * @param pVM The VM handle.
2139 * @param pPhysPage The guest page in question.
2140 */
2141int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2142{
2143 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2144 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2145 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2146 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2147
2148#if 1
2149 /*
2150 * There is a limit to what makes sense.
2151 */
2152 if (pPool->cPresent > 1024)
2153 {
2154 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2155 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2156 return VINF_PGM_GCPHYS_ALIASED;
2157 }
2158#endif
2159
2160 /*
2161 * Iterate all the pages until we've encountered all that in use.
2162 * This is simple but not quite optimal solution.
2163 */
2164 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2165 const uint32_t u32 = u64;
2166 unsigned cLeft = pPool->cUsedPages;
2167 unsigned iPage = pPool->cCurPages;
2168 while (--iPage >= PGMPOOL_IDX_FIRST)
2169 {
2170 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2171 if (pPage->GCPhys != NIL_RTGCPHYS)
2172 {
2173 switch (pPage->enmKind)
2174 {
2175 /*
2176 * We only care about shadow page tables.
2177 */
2178 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2179 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2180 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2181 {
2182 unsigned cPresent = pPage->cPresent;
2183 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2184 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2185 if (pPT->a[i].n.u1Present)
2186 {
2187 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2188 {
2189 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2190 pPT->a[i].u = 0;
2191 }
2192 if (!--cPresent)
2193 break;
2194 }
2195 break;
2196 }
2197
2198 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2199 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2200 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2201 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2202 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2203 {
2204 unsigned cPresent = pPage->cPresent;
2205 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2206 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2207 if (pPT->a[i].n.u1Present)
2208 {
2209 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2210 {
2211 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2212 pPT->a[i].u = 0;
2213 }
2214 if (!--cPresent)
2215 break;
2216 }
2217 break;
2218 }
2219 }
2220 if (!--cLeft)
2221 break;
2222 }
2223 }
2224
2225 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2226 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2227 return VINF_SUCCESS;
2228}
2229
2230
2231/**
2232 * Clears the user entry in a user table.
2233 *
2234 * This is used to remove all references to a page when flushing it.
2235 */
2236static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2237{
2238 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2239 Assert(pUser->iUser < pPool->cCurPages);
2240
2241 /*
2242 * Map the user page.
2243 */
2244 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2245 union
2246 {
2247 uint64_t *pau64;
2248 uint32_t *pau32;
2249 } u;
2250 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2251
2252#ifdef VBOX_STRICT
2253 /*
2254 * Some sanity checks.
2255 */
2256 switch (pUserPage->enmKind)
2257 {
2258 case PGMPOOLKIND_ROOT_32BIT_PD:
2259 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2260 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2261 break;
2262 case PGMPOOLKIND_ROOT_PAE_PD:
2263 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2264 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2265 break;
2266 case PGMPOOLKIND_ROOT_PDPTR:
2267 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2268 Assert(pUser->iUserTable < 4);
2269 break;
2270 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2271 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2272 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2273 break;
2274 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2275 case PGMPOOLKIND_ROOT_PML4:
2276 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2277 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2278 break;
2279 default:
2280 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2281 break;
2282 }
2283#endif /* VBOX_STRICT */
2284
2285 /*
2286 * Clear the entry in the user page.
2287 */
2288 switch (pUserPage->enmKind)
2289 {
2290 /* 32-bit entries */
2291 case PGMPOOLKIND_ROOT_32BIT_PD:
2292 u.pau32[pUser->iUserTable] = 0;
2293 break;
2294
2295 /* 64-bit entries */
2296 case PGMPOOLKIND_ROOT_PAE_PD:
2297 case PGMPOOLKIND_ROOT_PDPTR:
2298 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2299 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2300 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2301 case PGMPOOLKIND_ROOT_PML4:
2302 u.pau64[pUser->iUserTable] = 0;
2303 break;
2304
2305 default:
2306 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2307 }
2308}
2309
2310
2311/**
2312 * Clears all users of a page.
2313 */
2314static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2315{
2316 /*
2317 * Free all the user records.
2318 */
2319 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2320 uint16_t i = pPage->iUserHead;
2321 while (i != NIL_PGMPOOL_USER_INDEX)
2322 {
2323 /* Clear enter in user table. */
2324 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2325
2326 /* Free it. */
2327 const uint16_t iNext = paUsers[i].iNext;
2328 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2329 paUsers[i].iNext = pPool->iUserFreeHead;
2330 pPool->iUserFreeHead = i;
2331
2332 /* Next. */
2333 i = iNext;
2334 }
2335 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2336}
2337
2338
2339#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2340/**
2341 * Allocates a new physical cross reference extent.
2342 *
2343 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2344 * @param pVM The VM handle.
2345 * @param piPhysExt Where to store the phys ext index.
2346 */
2347PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2348{
2349 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2350 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2351 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2352 {
2353 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2354 return NULL;
2355 }
2356 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2357 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2358 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2359 *piPhysExt = iPhysExt;
2360 return pPhysExt;
2361}
2362
2363
2364/**
2365 * Frees a physical cross reference extent.
2366 *
2367 * @param pVM The VM handle.
2368 * @param iPhysExt The extent to free.
2369 */
2370void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2371{
2372 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2373 Assert(iPhysExt < pPool->cMaxPhysExts);
2374 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2375 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2376 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2377 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2378 pPool->iPhysExtFreeHead = iPhysExt;
2379}
2380
2381
2382/**
2383 * Frees a physical cross reference extent.
2384 *
2385 * @param pVM The VM handle.
2386 * @param iPhysExt The extent to free.
2387 */
2388void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2389{
2390 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2391
2392 const uint16_t iPhysExtStart = iPhysExt;
2393 PPGMPOOLPHYSEXT pPhysExt;
2394 do
2395 {
2396 Assert(iPhysExt < pPool->cMaxPhysExts);
2397 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2398 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2399 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2400
2401 /* next */
2402 iPhysExt = pPhysExt->iNext;
2403 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2404
2405 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2406 pPool->iPhysExtFreeHead = iPhysExtStart;
2407}
2408
2409/**
2410 * Insert a reference into a list of physical cross reference extents.
2411 *
2412 * @returns The new ram range flags (top 16-bits).
2413 *
2414 * @param pVM The VM handle.
2415 * @param iPhysExt The physical extent index of the list head.
2416 * @param iShwPT The shadow page table index.
2417 *
2418 */
2419static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2420{
2421 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2422 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2423
2424 /* special common case. */
2425 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2426 {
2427 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2428 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2429 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2430 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2431 }
2432
2433 /* general treatment. */
2434 const uint16_t iPhysExtStart = iPhysExt;
2435 unsigned cMax = 15;
2436 for (;;)
2437 {
2438 Assert(iPhysExt < pPool->cMaxPhysExts);
2439 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2440 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2441 {
2442 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2443 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2444 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2445 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2446 }
2447 if (!--cMax)
2448 {
2449 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2450 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2451 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2452 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2453 }
2454 }
2455
2456 /* add another extent to the list. */
2457 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2458 if (!pNew)
2459 {
2460 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2461 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2462 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2463 }
2464 pNew->iNext = iPhysExtStart;
2465 pNew->aidx[0] = iShwPT;
2466 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2467 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2468}
2469
2470
2471/**
2472 * Add a reference to guest physical page where extents are in use.
2473 *
2474 * @returns The new ram range flags (top 16-bits).
2475 *
2476 * @param pVM The VM handle.
2477 * @param u16 The ram range flags (top 16-bits).
2478 * @param iShwPT The shadow page table index.
2479 */
2480uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2481{
2482 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2483 {
2484 /*
2485 * Convert to extent list.
2486 */
2487 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2488 uint16_t iPhysExt;
2489 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2490 if (pPhysExt)
2491 {
2492 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2493 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2494 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2495 pPhysExt->aidx[1] = iShwPT;
2496 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2497 }
2498 else
2499 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2500 }
2501 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2502 {
2503 /*
2504 * Insert into the extent list.
2505 */
2506 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2507 }
2508 else
2509 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2510 return u16;
2511}
2512
2513
2514/**
2515 * Clear references to guest physical memory.
2516 *
2517 * @param pPool The pool.
2518 * @param pPage The page.
2519 * @param pPhysPage Pointer to the aPages entry in the ram range.
2520 */
2521void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2522{
2523 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2524 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2525
2526 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2527 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2528 {
2529 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2530 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2531 do
2532 {
2533 Assert(iPhysExt < pPool->cMaxPhysExts);
2534
2535 /*
2536 * Look for the shadow page and check if it's all freed.
2537 */
2538 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2539 {
2540 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2541 {
2542 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2543
2544 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2545 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2546 {
2547 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2548 return;
2549 }
2550
2551 /* we can free the node. */
2552 PVM pVM = pPool->CTXSUFF(pVM);
2553 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2554 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2555 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2556 {
2557 /* lonely node */
2558 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2559 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2560 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2561 }
2562 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2563 {
2564 /* head */
2565 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2566 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2567 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2568 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2569 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2570 }
2571 else
2572 {
2573 /* in list */
2574 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2575 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2576 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2577 }
2578 iPhysExt = iPhysExtNext;
2579 return;
2580 }
2581 }
2582
2583 /* next */
2584 iPhysExtPrev = iPhysExt;
2585 iPhysExt = paPhysExts[iPhysExt].iNext;
2586 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2587
2588 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2589 }
2590 else /* nothing to do */
2591 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
2592}
2593
2594
2595
2596/**
2597 * Clear references to guest physical memory.
2598 *
2599 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2600 * is assumed to be correct, so the linear search can be skipped and we can assert
2601 * at an earlier point.
2602 *
2603 * @param pPool The pool.
2604 * @param pPage The page.
2605 * @param HCPhys The host physical address corresponding to the guest page.
2606 * @param GCPhys The guest physical address corresponding to HCPhys.
2607 */
2608static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2609{
2610 /*
2611 * Walk range list.
2612 */
2613 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2614 while (pRam)
2615 {
2616 RTGCPHYS off = GCPhys - pRam->GCPhys;
2617 if (off < pRam->cb)
2618 {
2619 /* does it match? */
2620 const unsigned iPage = off >> PAGE_SHIFT;
2621 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2622 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2623 {
2624 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2625 return;
2626 }
2627 break;
2628 }
2629 pRam = CTXSUFF(pRam->pNext);
2630 }
2631 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2632}
2633
2634
2635/**
2636 * Clear references to guest physical memory.
2637 *
2638 * @param pPool The pool.
2639 * @param pPage The page.
2640 * @param HCPhys The host physical address corresponding to the guest page.
2641 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2642 */
2643static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2644{
2645 /*
2646 * Walk range list.
2647 */
2648 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2649 while (pRam)
2650 {
2651 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2652 if (off < pRam->cb)
2653 {
2654 /* does it match? */
2655 const unsigned iPage = off >> PAGE_SHIFT;
2656 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2657 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2658 {
2659 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2660 return;
2661 }
2662 break;
2663 }
2664 pRam = CTXSUFF(pRam->pNext);
2665 }
2666
2667 /*
2668 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2669 */
2670 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2671 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2672 while (pRam)
2673 {
2674 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2675 while (iPage-- > 0)
2676 {
2677 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2678 {
2679 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2680 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2681 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2682 return;
2683 }
2684 }
2685 pRam = CTXSUFF(pRam->pNext);
2686 }
2687
2688 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2689}
2690
2691
2692/**
2693 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2694 *
2695 * @param pPool The pool.
2696 * @param pPage The page.
2697 * @param pShwPT The shadow page table (mapping of the page).
2698 * @param pGstPT The guest page table.
2699 */
2700DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2701{
2702 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2703 if (pShwPT->a[i].n.u1Present)
2704 {
2705 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2706 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2707 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2708 if (!--pPage->cPresent)
2709 break;
2710 }
2711}
2712
2713
2714/**
2715 * Clear references to guest physical memory in a PAE / 32-bit page table.
2716 *
2717 * @param pPool The pool.
2718 * @param pPage The page.
2719 * @param pShwPT The shadow page table (mapping of the page).
2720 * @param pGstPT The guest page table (just a half one).
2721 */
2722DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2723{
2724 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2725 if (pShwPT->a[i].n.u1Present)
2726 {
2727 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2728 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2729 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2730 }
2731}
2732
2733
2734/**
2735 * Clear references to guest physical memory in a PAE / PAE page table.
2736 *
2737 * @param pPool The pool.
2738 * @param pPage The page.
2739 * @param pShwPT The shadow page table (mapping of the page).
2740 * @param pGstPT The guest page table.
2741 */
2742DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2743{
2744 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2745 if (pShwPT->a[i].n.u1Present)
2746 {
2747 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
2748 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
2749 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2750 }
2751}
2752
2753
2754/**
2755 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2756 *
2757 * @param pPool The pool.
2758 * @param pPage The page.
2759 * @param pShwPT The shadow page table (mapping of the page).
2760 */
2761DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2762{
2763 RTGCPHYS GCPhys = pPage->GCPhys;
2764 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2765 if (pShwPT->a[i].n.u1Present)
2766 {
2767 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2768 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2769 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2770 }
2771}
2772
2773
2774/**
2775 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2776 *
2777 * @param pPool The pool.
2778 * @param pPage The page.
2779 * @param pShwPT The shadow page table (mapping of the page).
2780 */
2781DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2782{
2783 RTGCPHYS GCPhys = pPage->GCPhys;
2784 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2785 if (pShwPT->a[i].n.u1Present)
2786 {
2787 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2788 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
2789 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2790 }
2791}
2792#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2793
2794
2795/**
2796 * Clear references to shadowed pages in a PAE page directory.
2797 *
2798 * @param pPool The pool.
2799 * @param pPage The page.
2800 * @param pShwPD The shadow page directory (mapping of the page).
2801 */
2802DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2803{
2804 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2805 {
2806 if (pShwPD->a[i].n.u1Present)
2807 {
2808 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2809 if (pSubPage)
2810 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2811 else
2812 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2813 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2814 }
2815 }
2816}
2817
2818
2819/**
2820 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2821 *
2822 * @param pPool The pool.
2823 * @param pPage The page.
2824 * @param pShwPdPtr The shadow page directory pointer table (mapping of the page).
2825 */
2826DECLINLINE(void) pgmPoolTrackDerefPDPTR64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPTR pShwPdPtr)
2827{
2828 for (unsigned i = 0; i < ELEMENTS(pShwPdPtr->a); i++)
2829 {
2830 if (pShwPdPtr->a[i].n.u1Present)
2831 {
2832 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPdPtr->a[i].u & X86_PDPE_PG_MASK);
2833 if (pSubPage)
2834 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2835 else
2836 AssertFatalMsgFailed(("%RX64\n", pShwPdPtr->a[i].u & X86_PDPE_PG_MASK));
2837 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2838 }
2839 }
2840}
2841
2842
2843/**
2844 * Clears all references made by this page.
2845 *
2846 * This includes other shadow pages and GC physical addresses.
2847 *
2848 * @param pPool The pool.
2849 * @param pPage The page.
2850 */
2851static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2852{
2853 /*
2854 * Map the shadow page and take action according to the page kind.
2855 */
2856 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2857 switch (pPage->enmKind)
2858 {
2859#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2860 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2861 {
2862 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2863 void *pvGst;
2864 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2865 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2866 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2867 break;
2868 }
2869
2870 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2871 {
2872 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2873 void *pvGst;
2874 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2875 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2876 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2877 break;
2878 }
2879
2880 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2881 {
2882 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2883 void *pvGst;
2884 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2885 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
2886 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2887 break;
2888 }
2889
2890 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
2891 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2892 {
2893 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2894 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
2895 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2896 break;
2897 }
2898
2899 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
2900 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2901 {
2902 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2903 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
2904 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2905 break;
2906 }
2907
2908#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2909 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2910 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2911 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2912 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2913 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2914 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2915 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2916 break;
2917#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2918
2919 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2920 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2921 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
2922 break;
2923
2924 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2925 pgmPoolTrackDerefPDPTR64Bit(pPool, pPage, (PX86PDPTR)pvShw);
2926 break;
2927
2928 default:
2929 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
2930 }
2931
2932 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
2933 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2934 ASMMemZeroPage(pvShw);
2935 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2936 pPage->fZeroed = true;
2937}
2938#endif /* PGMPOOL_WITH_USER_TRACKING */
2939
2940
2941/**
2942 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
2943 *
2944 * @param pPool The pool.
2945 */
2946static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
2947{
2948 /*
2949 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
2950 */
2951 Assert(NIL_PGMPOOL_IDX == 0);
2952 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
2953 {
2954 /*
2955 * Get the page address.
2956 */
2957 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2958 union
2959 {
2960 uint64_t *pau64;
2961 uint32_t *pau32;
2962 } u;
2963 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2964
2965 /*
2966 * Mark stuff not present.
2967 */
2968 switch (pPage->enmKind)
2969 {
2970 case PGMPOOLKIND_ROOT_32BIT_PD:
2971 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
2972 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2973 u.pau32[iPage] = 0;
2974 break;
2975
2976 case PGMPOOLKIND_ROOT_PAE_PD:
2977 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * 4; iPage++)
2978 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2979 u.pau64[iPage] = 0;
2980 break;
2981
2982 case PGMPOOLKIND_ROOT_PML4:
2983 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
2984 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
2985 u.pau64[iPage] = 0;
2986 break;
2987
2988 case PGMPOOLKIND_ROOT_PDPTR:
2989 /* Not root of shadowed pages currently, ignore it. */
2990 break;
2991 }
2992 }
2993
2994 /*
2995 * Paranoia (to be removed), flag a global CR3 sync.
2996 */
2997 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
2998}
2999
3000
3001/**
3002 * Flushes the entire cache.
3003 *
3004 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3005 * and execute this CR3 flush.
3006 *
3007 * @param pPool The pool.
3008 */
3009static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3010{
3011 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3012 LogFlow(("pgmPoolFlushAllInt:\n"));
3013
3014 /*
3015 * If there are no pages in the pool, there is nothing to do.
3016 */
3017 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3018 {
3019 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3020 return;
3021 }
3022
3023 /*
3024 * Nuke the free list and reinsert all pages into it.
3025 */
3026 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3027 {
3028 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3029
3030#ifdef IN_RING3
3031 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3032#endif
3033#ifdef PGMPOOL_WITH_MONITORING
3034 if (pPage->fMonitored)
3035 pgmPoolMonitorFlush(pPool, pPage);
3036 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3037 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3038 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3039 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3040 pPage->cModifications = 0;
3041#endif
3042 pPage->GCPhys = NIL_RTGCPHYS;
3043 pPage->enmKind = PGMPOOLKIND_FREE;
3044 Assert(pPage->idx == i);
3045 pPage->iNext = i + 1;
3046 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3047 pPage->fSeenNonGlobal = false;
3048 pPage->fMonitored= false;
3049 pPage->fCached = false;
3050 pPage->fReusedFlushPending = false;
3051 pPage->fCR3Mix = false;
3052#ifdef PGMPOOL_WITH_USER_TRACKING
3053 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3054#endif
3055#ifdef PGMPOOL_WITH_CACHE
3056 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3057 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3058#endif
3059 }
3060 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3061 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3062 pPool->cUsedPages = 0;
3063
3064#ifdef PGMPOOL_WITH_USER_TRACKING
3065 /*
3066 * Zap and reinitialize the user records.
3067 */
3068 pPool->cPresent = 0;
3069 pPool->iUserFreeHead = 0;
3070 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3071 const unsigned cMaxUsers = pPool->cMaxUsers;
3072 for (unsigned i = 0; i < cMaxUsers; i++)
3073 {
3074 paUsers[i].iNext = i + 1;
3075 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3076 paUsers[i].iUserTable = 0xfffe;
3077 }
3078 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3079#endif
3080
3081#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3082 /*
3083 * Clear all the GCPhys links and rebuild the phys ext free list.
3084 */
3085 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
3086 pRam;
3087 pRam = pRam->CTXSUFF(pNext))
3088 {
3089 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3090 while (iPage-- > 0)
3091 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3092 }
3093
3094 pPool->iPhysExtFreeHead = 0;
3095 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3096 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3097 for (unsigned i = 0; i < cMaxPhysExts; i++)
3098 {
3099 paPhysExts[i].iNext = i + 1;
3100 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3101 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3102 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3103 }
3104 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3105#endif
3106
3107#ifdef PGMPOOL_WITH_MONITORING
3108 /*
3109 * Just zap the modified list.
3110 */
3111 pPool->cModifiedPages = 0;
3112 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3113#endif
3114
3115#ifdef PGMPOOL_WITH_CACHE
3116 /*
3117 * Clear the GCPhys hash and the age list.
3118 */
3119 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3120 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3121 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3122 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3123#endif
3124
3125 /*
3126 * Flush all the special root pages.
3127 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3128 */
3129 pgmPoolFlushAllSpecialRoots(pPool);
3130 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3131 {
3132 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3133 pPage->iNext = NIL_PGMPOOL_IDX;
3134#ifdef PGMPOOL_WITH_MONITORING
3135 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3136 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3137 pPage->cModifications = 0;
3138 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3139 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3140 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3141 if (pPage->fMonitored)
3142 {
3143 PVM pVM = pPool->CTXSUFF(pVM);
3144 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3145 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3146 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3147 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
3148 pPool->pszAccessHandler);
3149 AssertFatalRCSuccess(rc);
3150# ifdef PGMPOOL_WITH_CACHE
3151 pgmPoolHashInsert(pPool, pPage);
3152# endif
3153 }
3154#endif
3155#ifdef PGMPOOL_WITH_USER_TRACKING
3156 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3157#endif
3158#ifdef PGMPOOL_WITH_CACHE
3159 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3160 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3161#endif
3162 }
3163
3164 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3165}
3166
3167
3168/**
3169 * Flushes a pool page.
3170 *
3171 * This moves the page to the free list after removing all user references to it.
3172 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3173 *
3174 * @returns VBox status code.
3175 * @retval VINF_SUCCESS on success.
3176 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3177 * @param pPool The pool.
3178 * @param HCPhys The HC physical address of the shadow page.
3179 */
3180int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3181{
3182 int rc = VINF_SUCCESS;
3183 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3184 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3185 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3186
3187 /*
3188 * Quietly reject any attempts at flushing any of the special root pages.
3189 */
3190 if (pPage->idx < PGMPOOL_IDX_FIRST)
3191 {
3192 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3193 return VINF_SUCCESS;
3194 }
3195
3196 /*
3197 * Mark the page as being in need of a ASMMemZeroPage().
3198 */
3199 pPage->fZeroed = false;
3200
3201#ifdef PGMPOOL_WITH_USER_TRACKING
3202 /*
3203 * Clear the page.
3204 */
3205 pgmPoolTrackClearPageUsers(pPool, pPage);
3206 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3207 pgmPoolTrackDeref(pPool, pPage);
3208 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3209#endif
3210
3211#ifdef PGMPOOL_WITH_CACHE
3212 /*
3213 * Flush it from the cache.
3214 */
3215 pgmPoolCacheFlushPage(pPool, pPage);
3216#endif /* PGMPOOL_WITH_CACHE */
3217
3218#ifdef PGMPOOL_WITH_MONITORING
3219 /*
3220 * Deregistering the monitoring.
3221 */
3222 if (pPage->fMonitored)
3223 rc = pgmPoolMonitorFlush(pPool, pPage);
3224#endif
3225
3226 /*
3227 * Free the page.
3228 */
3229 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3230 pPage->iNext = pPool->iFreeHead;
3231 pPool->iFreeHead = pPage->idx;
3232 pPage->enmKind = PGMPOOLKIND_FREE;
3233 pPage->GCPhys = NIL_RTGCPHYS;
3234 pPage->fReusedFlushPending = false;
3235
3236 pPool->cUsedPages--;
3237 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3238 return rc;
3239}
3240
3241
3242/**
3243 * Frees a usage of a pool page.
3244 *
3245 * The caller is responsible to updating the user table so that it no longer
3246 * references the shadow page.
3247 *
3248 * @param pPool The pool.
3249 * @param HCPhys The HC physical address of the shadow page.
3250 * @param iUser The shadow page pool index of the user table.
3251 * @param iUserTable The index into the user table (shadowed).
3252 */
3253void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3254{
3255 STAM_PROFILE_START(&pPool->StatFree, a);
3256 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3257 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3258 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3259#ifdef PGMPOOL_WITH_USER_TRACKING
3260 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3261#endif
3262#ifdef PGMPOOL_WITH_CACHE
3263 if (!pPage->fCached)
3264#endif
3265 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3266 STAM_PROFILE_STOP(&pPool->StatFree, a);
3267}
3268
3269
3270/**
3271 * Makes one or more free page free.
3272 *
3273 * @returns VBox status code.
3274 * @retval VINF_SUCCESS on success.
3275 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3276 *
3277 * @param pPool The pool.
3278 * @param iUser The user of the page.
3279 */
3280static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3281{
3282 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3283
3284 /*
3285 * If the pool isn't full grown yet, expand it.
3286 */
3287 if (pPool->cCurPages < pPool->cMaxPages)
3288 {
3289 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3290#ifdef IN_RING3
3291 int rc = PGMR3PoolGrow(pPool->pVMHC);
3292#else
3293 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3294#endif
3295 if (VBOX_FAILURE(rc))
3296 return rc;
3297 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3298 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3299 return VINF_SUCCESS;
3300 }
3301
3302#ifdef PGMPOOL_WITH_CACHE
3303 /*
3304 * Free one cached page.
3305 */
3306 return pgmPoolCacheFreeOne(pPool, iUser);
3307#else
3308 /*
3309 * Flush the pool.
3310 * If we have tracking enabled, it should be possible to come up with
3311 * a cheap replacement strategy...
3312 */
3313 pgmPoolFlushAllInt(pPool);
3314 return VERR_PGM_POOL_FLUSHED;
3315#endif
3316}
3317
3318
3319/**
3320 * Allocates a page from the pool.
3321 *
3322 * This page may actually be a cached page and not in need of any processing
3323 * on the callers part.
3324 *
3325 * @returns VBox status code.
3326 * @retval VINF_SUCCESS if a NEW page was allocated.
3327 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3328 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3329 * @param pVM The VM handle.
3330 * @param GCPhys The GC physical address of the page we're gonna shadow.
3331 * For 4MB and 2MB PD entries, it's the first address the
3332 * shadow PT is covering.
3333 * @param enmKind The kind of mapping.
3334 * @param iUser The shadow page pool index of the user table.
3335 * @param iUserTable The index into the user table (shadowed).
3336 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3337 */
3338int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3339{
3340 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3341 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3342 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3343
3344 *ppPage = NULL;
3345
3346#ifdef PGMPOOL_WITH_CACHE
3347 if (pPool->fCacheEnabled)
3348 {
3349 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3350 if (VBOX_SUCCESS(rc2))
3351 {
3352 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3353 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3354 return rc2;
3355 }
3356 }
3357#endif
3358
3359 /*
3360 * Allocate a new one.
3361 */
3362 int rc = VINF_SUCCESS;
3363 uint16_t iNew = pPool->iFreeHead;
3364 if (iNew == NIL_PGMPOOL_IDX)
3365 {
3366 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3367 if (VBOX_FAILURE(rc))
3368 {
3369 if (rc != VERR_PGM_POOL_CLEARED)
3370 {
3371 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3372 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3373 return rc;
3374 }
3375 rc = VERR_PGM_POOL_FLUSHED;
3376 }
3377 iNew = pPool->iFreeHead;
3378 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3379 }
3380
3381 /* unlink the free head */
3382 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3383 pPool->iFreeHead = pPage->iNext;
3384 pPage->iNext = NIL_PGMPOOL_IDX;
3385
3386 /*
3387 * Initialize it.
3388 */
3389 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3390 pPage->enmKind = enmKind;
3391 pPage->GCPhys = GCPhys;
3392 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3393 pPage->fMonitored = false;
3394 pPage->fCached = false;
3395 pPage->fReusedFlushPending = false;
3396 pPage->fCR3Mix = false;
3397#ifdef PGMPOOL_WITH_MONITORING
3398 pPage->cModifications = 0;
3399 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3400 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3401#endif
3402#ifdef PGMPOOL_WITH_USER_TRACKING
3403 pPage->cPresent = 0;
3404 pPage->iFirstPresent = ~0;
3405
3406 /*
3407 * Insert into the tracking and cache. If this fails, free the page.
3408 */
3409 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3410 if (VBOX_FAILURE(rc3))
3411 {
3412 if (rc3 != VERR_PGM_POOL_CLEARED)
3413 {
3414 pPool->cUsedPages--;
3415 pPage->enmKind = PGMPOOLKIND_FREE;
3416 pPage->GCPhys = NIL_RTGCPHYS;
3417 pPage->iNext = pPool->iFreeHead;
3418 pPool->iFreeHead = pPage->idx;
3419 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3420 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3421 return rc3;
3422 }
3423 rc = VERR_PGM_POOL_FLUSHED;
3424 }
3425#endif /* PGMPOOL_WITH_USER_TRACKING */
3426
3427 /*
3428 * Commit the allocation, clear the page and return.
3429 */
3430#ifdef VBOX_WITH_STATISTICS
3431 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3432 pPool->cUsedPagesHigh = pPool->cUsedPages;
3433#endif
3434
3435 if (!pPage->fZeroed)
3436 {
3437 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3438 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3439 ASMMemZeroPage(pv);
3440 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3441 }
3442
3443 *ppPage = pPage;
3444 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3445 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3446 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3447 return rc;
3448}
3449
3450
3451/**
3452 * Frees a usage of a pool page.
3453 *
3454 * @param pVM The VM handle.
3455 * @param HCPhys The HC physical address of the shadow page.
3456 * @param iUser The shadow page pool index of the user table.
3457 * @param iUserTable The index into the user table (shadowed).
3458 */
3459void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3460{
3461 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3462 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3463 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3464}
3465
3466
3467/**
3468 * Gets a in-use page in the pool by it's physical address.
3469 *
3470 * @returns Pointer to the page.
3471 * @param pVM The VM handle.
3472 * @param HCPhys The HC physical address of the shadow page.
3473 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3474 */
3475PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3476{
3477 /** @todo profile this! */
3478 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3479 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3480 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3481 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3482 return pPage;
3483}
3484
3485
3486/**
3487 * Flushes the entire cache.
3488 *
3489 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3490 * and execute this CR3 flush.
3491 *
3492 * @param pPool The pool.
3493 */
3494void pgmPoolFlushAll(PVM pVM)
3495{
3496 LogFlow(("pgmPoolFlushAll:\n"));
3497 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3498}
3499
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette