VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 2623

Last change on this file since 2623 was 2297, checked in by vboxsync, 18 years ago

svn:eol-style native

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 119.8 KB
Line 
1/* $Id: PGMAllPool.cpp 2297 2007-04-20 23:51:13Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006 InnoTek Systemberatung GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * If you received this file as part of a commercial VirtualBox
18 * distribution, then only the terms of your commercial VirtualBox
19 * license agreement apply instead of the previous paragraph.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37
38#include <VBox/log.h>
39#include <VBox/err.h>
40#include <iprt/asm.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46__BEGIN_DECLS
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48#ifdef PGMPOOL_WITH_USER_TRACKING
49DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
50DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
51static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#endif
53#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
54static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
55#endif
56#ifdef PGMPOOL_WITH_CACHE
57static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
58#endif
59#ifdef PGMPOOL_WITH_MONITORING
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#endif
62#ifndef IN_RING3
63DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
64#endif
65__END_DECLS
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88#ifdef IN_GC
89/**
90 * Maps a pool page into the current context.
91 *
92 * @returns Pointer to the mapping.
93 * @param pVM The VM handle.
94 * @param pPage The page to map.
95 */
96void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
97{
98 /* general pages. */
99 if (pPage->idx >= PGMPOOL_IDX_FIRST)
100 {
101 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
102 void *pv;
103 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
104 AssertReleaseRC(rc);
105 return pv;
106 }
107
108 /* special pages. */
109 switch (pPage->idx)
110 {
111 case PGMPOOL_IDX_PD:
112 return pVM->pgm.s.pGC32BitPD;
113 case PGMPOOL_IDX_PAE_PD:
114 return pVM->pgm.s.apGCPaePDs[0];
115 case PGMPOOL_IDX_PDPTR:
116 return pVM->pgm.s.pGCPaePDPTR;
117 case PGMPOOL_IDX_PML4:
118 return pVM->pgm.s.pGCPaePML4;
119 default:
120 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
121 return NULL;
122 }
123}
124#endif /* IN_GC */
125
126
127#ifdef PGMPOOL_WITH_MONITORING
128/**
129 * Determin the size of a write instruction.
130 * @returns number of bytes written.
131 * @param pDis The disassembler state.
132 */
133static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
134{
135 /*
136 * This is very crude and possibly wrong for some opcodes,
137 * but since it's not really supposed to be called we can
138 * probably live with that.
139 */
140 return DISGetParamSize(pDis, &pDis->param1);
141}
142
143
144/**
145 * Flushes a chain of pages sharing the same access monitor.
146 *
147 * @returns VBox status code suitable for scheduling.
148 * @param pPool The pool.
149 * @param pPage A page in the chain.
150 */
151int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
152{
153 /*
154 * Find the list head.
155 */
156 uint16_t idx = pPage->idx;
157 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
158 {
159 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
160 {
161 idx = pPage->iMonitoredPrev;
162 Assert(idx != pPage->idx);
163 pPage = &pPool->aPages[idx];
164 }
165 }
166
167 /*
168 * Itereate the list flushing each shadow page.
169 */
170 int rc = VINF_SUCCESS;
171 for (;;)
172 {
173 idx = pPage->iMonitoredNext;
174 Assert(idx != pPage->idx);
175 if (pPage->idx >= PGMPOOL_IDX_FIRST)
176 {
177 int rc2 = pgmPoolFlushPage(pPool, pPage);
178 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
179 rc = VINF_PGM_SYNC_CR3;
180 }
181 /* next */
182 if (idx == NIL_PGMPOOL_IDX)
183 break;
184 pPage = &pPool->aPages[idx];
185 }
186 return rc;
187}
188
189
190/**
191 * Wrapper for getting the current context pointer to the entry begin modified.
192 *
193 * @returns Pointer to the current context mapping of the entry.
194 * @param pPool The pool.
195 * @param pvFault The fault virtual address.
196 * @param GCPhysFault The fault physical address.
197 * @param cbEntry The entry size.
198 */
199#ifdef IN_RING3
200DECLINLINE(void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
201#else
202DECLINLINE(void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
203#endif
204{
205#ifdef IN_GC
206 return (RTGCPTR)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
207
208#elif defined(IN_RING0)
209 void *pvRet;
210 int rc = PGMRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
211 AssertFatalRCSuccess(rc);
212 return pvRet;
213
214#elif defined(IN_RING3)
215 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
216#else
217# error "huh?"
218#endif
219}
220
221
222/**
223 * Process shadow entries before they are changed by the guest.
224 *
225 * For PT entries we will clear them. For PD entries, we'll simply check
226 * for mapping conflicts and set the SyncCR3 FF if found.
227 *
228 * @param pPool The pool.
229 * @param pPage The head page.
230 * @param GCPhysFault The guest physical fault address.
231 * @param uAddress In R0 and GC this is the guest context fault address (flat).
232 * In R3 this is the host context 'fault' address.
233 * @param pCpu The disassembler state for figuring out the write size.
234 * This need not be specified if the caller knows we won't do cross entry accesses.
235 */
236#ifdef IN_RING3
237void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
238#else
239void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
240#endif
241{
242 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
243 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
244 for (;;)
245 {
246 union
247 {
248 void *pv;
249 PX86PT pPT;
250 PX86PTPAE pPTPae;
251 PX86PD pPD;
252 PX86PDPAE pPDPae;
253 } uShw;
254 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
255
256 switch (pPage->enmKind)
257 {
258 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
259 {
260 const unsigned iShw = off / sizeof(X86PTE);
261 if (uShw.pPT->a[iShw].n.u1Present)
262 {
263# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
264 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
265 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
266 pgmPoolTracDerefGCPhysHint(pPool, pPage,
267 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
268 pGstPte->u & X86_PTE_PG_MASK);
269# endif
270 uShw.pPT->a[iShw].u = 0;
271 }
272 break;
273 }
274
275 /* page/2 sized */
276 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
277 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
278 {
279 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
280 if (uShw.pPTPae->a[iShw].n.u1Present)
281 {
282# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
283 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
284 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 pGstPte->u & X86_PTE_PG_MASK);
288# endif
289 uShw.pPTPae->a[iShw].u = 0;
290 }
291 }
292 break;
293
294 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
295 {
296 const unsigned iShw = off / sizeof(X86PTPAE);
297 if (uShw.pPTPae->a[iShw].n.u1Present)
298 {
299# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
300 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
301 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
302 pgmPoolTracDerefGCPhysHint(pPool, pPage,
303 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
304 pGstPte->u & X86_PTE_PAE_PG_MASK);
305# endif
306 uShw.pPTPae->a[iShw].u = 0;
307 }
308 break;
309 }
310
311 case PGMPOOLKIND_ROOT_32BIT_PD:
312 {
313 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
314 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
315 {
316 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
317 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
318 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
319 }
320 /* paranoia / a bit assumptive. */
321 else if ( pCpu
322 && (off & 4)
323 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
324 {
325 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
326 if ( iShw2 != iShw
327 && iShw2 < ELEMENTS(uShw.pPD->a)
328 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
329 {
330 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
331 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
332 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
333 }
334 }
335#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
336 if ( uShw.pPD->a[iShw].n.u1Present
337 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
338 {
339 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
340# ifdef IN_GC /* TLB load - we're pushing things a bit... */
341 ASMProbeReadByte(pvAddress);
342# endif
343 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
344 uShw.pPD->a[iShw].u = 0;
345 }
346#endif
347 break;
348 }
349
350 case PGMPOOLKIND_ROOT_PAE_PD:
351 {
352 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
353 for (unsigned i = 0; i < 2; i++, iShw++)
354 {
355 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
356 {
357 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
358 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
359 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
360 }
361 /* paranoia / a bit assumptive. */
362 else if ( pCpu
363 && (off & 4)
364 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
365 {
366 const unsigned iShw2 = iShw + 2;
367 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
368 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
369 {
370 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
371 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
372 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
373 }
374 }
375#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
376 if ( uShw.pPDPae->a[iShw].n.u1Present
377 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
378 {
379 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
380# ifdef IN_GC /* TLB load - we're pushing things a bit... */
381 ASMProbeReadByte(pvAddress);
382# endif
383 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
384 uShw.pPDPae->a[iShw].u = 0;
385 }
386#endif
387 }
388 break;
389 }
390
391 default:
392 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
393 }
394
395 /* next */
396 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
397 return;
398 pPage = &pPool->aPages[pPage->iMonitoredNext];
399 }
400}
401
402
403# ifndef IN_RING3
404/**
405 * Checks if a access could be a fork operation in progress.
406 *
407 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
408 *
409 * @returns true if it's likly that we're forking, otherwise false.
410 * @param pPool The pool.
411 * @param pCpu The disassembled instruction.
412 * @param offFault The access offset.
413 */
414DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
415{
416 /*
417 * i386 linux is using btr to clear X86_PTE_RW.
418 * The functions involved are (2.6.16 source inspection):
419 * clear_bit
420 * ptep_set_wrprotect
421 * copy_one_pte
422 * copy_pte_range
423 * copy_pmd_range
424 * copy_pud_range
425 * copy_page_range
426 * dup_mmap
427 * dup_mm
428 * copy_mm
429 * copy_process
430 * do_fork
431 */
432 if ( pCpu->pCurInstr->opcode == OP_BTR
433 && !(offFault & 4)
434 /** @todo Validate that the bit index is X86_PTE_RW. */
435 )
436 {
437 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
438 return true;
439 }
440 return false;
441}
442
443
444/**
445 * Determin whether the page is likely to have been reused.
446 *
447 * @returns true if we consider the page as being reused for a different purpose.
448 * @returns false if we consider it to still be a paging page.
449 * @param pPage The page in question.
450 * @param pCpu The disassembly info for the faulting insturction.
451 * @param pvFault The fault address.
452 *
453 * @remark The REP prefix check is left to the caller because of STOSD/W.
454 */
455DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
456{
457 switch (pCpu->pCurInstr->opcode)
458 {
459 case OP_PUSH:
460 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
461 return true;
462 case OP_PUSHF:
463 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
464 return true;
465 case OP_PUSHA:
466 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
467 return true;
468 case OP_FXSAVE:
469 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
470 return true;
471 }
472 if ( (pCpu->param1.flags & USE_REG_GEN32)
473 && (pCpu->param1.base.reg_gen32 == USE_REG_ESP))
474 {
475 Log4(("pgmPoolMonitorIsReused: ESP\n"));
476 return true;
477 }
478
479 //if (pPage->fCR3Mix)
480 // return false;
481 return false;
482}
483
484
485/**
486 * Flushes the page being accessed.
487 *
488 * @returns VBox status code suitable for scheduling.
489 * @param pVM The VM handle.
490 * @param pPool The pool.
491 * @param pPage The pool page (head).
492 * @param pCpu The disassembly of the write instruction.
493 * @param pRegFrame The trap register frame.
494 * @param GCPhysFault The fault address as guest physical address.
495 * @param pvFault The fault address.
496 */
497static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
498 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
499{
500 /*
501 * First, do the flushing.
502 */
503 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
504
505 /*
506 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
507 */
508 uint32_t cbWritten;
509 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
510 if (VBOX_SUCCESS(rc2))
511 pRegFrame->eip += pCpu->opsize;
512 else if (rc2 == VERR_EM_INTERPRETER)
513 {
514#ifdef IN_GC
515 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip))
516 {
517 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04:%RGv, ignoring.\n",
518 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
519 rc = VINF_SUCCESS;
520 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
521 }
522 else
523#endif
524 {
525 rc = VINF_EM_RAW_EMULATE_INSTR;
526 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
527 }
528 }
529 else
530 rc = rc2;
531
532 /* See use in pgmPoolAccessHandlerSimple(). */
533 PGM_INVL_GUEST_TLBS();
534
535 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
536 return rc;
537
538}
539
540
541/**
542 * Handles the STOSD write accesses.
543 *
544 * @returns VBox status code suitable for scheduling.
545 * @param pVM The VM handle.
546 * @param pPool The pool.
547 * @param pPage The pool page (head).
548 * @param pCpu The disassembly of the write instruction.
549 * @param pRegFrame The trap register frame.
550 * @param GCPhysFault The fault address as guest physical address.
551 * @param pvFault The fault address.
552 */
553DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
554 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
555{
556 /*
557 * Increment the modification counter and insert it into the list
558 * of modified pages the first time.
559 */
560 if (!pPage->cModifications++)
561 pgmPoolMonitorModifiedInsert(pPool, pPage);
562
563 /*
564 * Execute REP STOSD.
565 *
566 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
567 * write situation, meaning that it's safe to write here.
568 */
569#ifdef IN_GC
570 uint32_t *pu32 = (uint32_t *)pvFault;
571#else
572 RTGCPTR pu32 = pvFault;
573#endif
574 while (pRegFrame->ecx)
575 {
576 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pu32, NULL);
577#ifdef IN_GC
578 *pu32++ = pRegFrame->eax;
579#else
580 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
581 pu32 += 4;
582#endif
583 GCPhysFault += 4;
584 pRegFrame->edi += 4;
585 pRegFrame->ecx--;
586 }
587 pRegFrame->eip += pCpu->opsize;
588
589 /* See use in pgmPoolAccessHandlerSimple(). */
590 PGM_INVL_GUEST_TLBS();
591
592 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
593 return VINF_SUCCESS;
594}
595
596
597/**
598 * Handles the simple write accesses.
599 *
600 * @returns VBox status code suitable for scheduling.
601 * @param pVM The VM handle.
602 * @param pPool The pool.
603 * @param pPage The pool page (head).
604 * @param pCpu The disassembly of the write instruction.
605 * @param pRegFrame The trap register frame.
606 * @param GCPhysFault The fault address as guest physical address.
607 * @param pvFault The fault address.
608 */
609DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
610 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
611{
612 /*
613 * Increment the modification counter and insert it into the list
614 * of modified pages the first time.
615 */
616 if (!pPage->cModifications++)
617 pgmPoolMonitorModifiedInsert(pPool, pPage);
618
619 /*
620 * Clear all the pages. ASSUMES that pvFault is readable.
621 */
622 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
623
624 /*
625 * Interpret the instruction.
626 */
627 uint32_t cb;
628 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
629 if (VBOX_SUCCESS(rc))
630 pRegFrame->eip += pCpu->opsize;
631 else if (rc == VERR_EM_INTERPRETER)
632 {
633# ifdef IN_GC
634 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)(RTGCUINTPTR)pCpu->opaddr))
635 {
636 /* We're not able to handle this in ring-3, so fix the interpreter! */
637 /** @note Should be fine. There's no need to flush the whole thing. */
638#ifndef DEBUG_sandervl
639 AssertMsgFailed(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
640 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
641#endif
642 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch1);
643 rc = pgmPoolMonitorChainFlush(pPool, pPage);
644 }
645 else
646# endif
647 {
648 rc = VINF_EM_RAW_EMULATE_INSTR;
649 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
650 }
651 }
652
653 /*
654 * Quick hack, with logging enabled we're getting stale
655 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
656 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
657 * have to be fixed to support this. But that'll have to wait till next week.
658 *
659 * An alternative is to keep track of the changed PTEs together with the
660 * GCPhys from the guest PT. This may proove expensive though.
661 *
662 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
663 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
664 */
665 PGM_INVL_GUEST_TLBS();
666
667 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
668 return rc;
669}
670
671
672/**
673 * \#PF Handler callback for PT write accesses.
674 *
675 * @returns VBox status code (appropriate for GC return).
676 * @param pVM VM Handle.
677 * @param uErrorCode CPU Error code.
678 * @param pRegFrame Trap register frame.
679 * NULL on DMA and other non CPU access.
680 * @param pvFault The fault address (cr2).
681 * @param GCPhysFault The GC physical address corresponding to pvFault.
682 * @param pvUser User argument.
683 */
684DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
685{
686 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
687 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
688 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
689 LogFlow(("pgmPoolAccessHandler: pvFault=%p pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
690
691 /*
692 * We should ALWAYS have the list head as user parameter. This
693 * is because we use that page to record the changes.
694 */
695 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
696
697 /*
698 * Disassemble the faulting instruction.
699 */
700 DISCPUSTATE Cpu;
701 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
702 AssertRCReturn(rc, rc);
703
704 /*
705 * Check if it's worth dealing with.
706 */
707 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
708 || pPage->fCR3Mix)
709 && !pgmPoolMonitorIsReused(pPage, &Cpu,pvFault)
710 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
711 {
712 /*
713 * Simple instructions, no REP prefix.
714 */
715 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
716 {
717 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
718 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
719 return rc;
720 }
721
722 /*
723 * Windows is frequently doing small memset() operations (netio test 4k+).
724 * We have to deal with these or we'll kill the cache and performance.
725 */
726 if ( Cpu.pCurInstr->opcode == OP_STOSWD
727 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
728 && pRegFrame->ecx <= 0x20
729 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
730 && !((uintptr_t)pvFault & 3)
731 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
732 && Cpu.mode == CPUMODE_32BIT
733 && Cpu.opmode == CPUMODE_32BIT
734 && Cpu.addrmode == CPUMODE_32BIT
735 && Cpu.prefix == PREFIX_REP
736 && !pRegFrame->eflags.Bits.u1DF
737 )
738 {
739 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
740 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
741 return rc;
742 }
743
744 /* REP prefix, don't bother. */
745 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
746 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
747 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
748 }
749
750 /*
751 * Not worth it, so flush it.
752 */
753 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
754 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
755 return rc;
756}
757
758# endif /* !IN_RING3 */
759#endif /* PGMPOOL_WITH_MONITORING */
760
761
762
763#ifdef PGMPOOL_WITH_CACHE
764/**
765 * Inserts a page into the GCPhys hash table.
766 *
767 * @param pPool The pool.
768 * @param pPage The page.
769 */
770DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
771{
772 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
773 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
774 pPage->iNext = pPool->aiHash[iHash];
775 pPool->aiHash[iHash] = pPage->idx;
776}
777
778
779/**
780 * Removes a page from the GCPhys hash table.
781 *
782 * @param pPool The pool.
783 * @param pPage The page.
784 */
785DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
786{
787 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
788 if (pPool->aiHash[iHash] == pPage->idx)
789 pPool->aiHash[iHash] = pPage->iNext;
790 else
791 {
792 uint16_t iPrev = pPool->aiHash[iHash];
793 for (;;)
794 {
795 const int16_t i = pPool->aPages[iPrev].iNext;
796 if (i == pPage->idx)
797 {
798 pPool->aPages[iPrev].iNext = pPage->iNext;
799 break;
800 }
801 if (i == NIL_PGMPOOL_IDX)
802 {
803 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
804 break;
805 }
806 iPrev = i;
807 }
808 }
809 pPage->iNext = NIL_PGMPOOL_IDX;
810}
811
812
813/**
814 * Frees up one cache page.
815 *
816 * @returns VBox status code.
817 * @retval VINF_SUCCESS on success.
818 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
819 * @param pPool The pool.
820 * @param iUser The user index.
821 */
822static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
823{
824 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
825 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
826
827 /*
828 * Select one page from the tail of the age list.
829 */
830 uint16_t iToFree = pPool->iAgeTail;
831 if (iToFree == iUser)
832 iToFree = pPool->aPages[iToFree].iAgePrev;
833/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
834 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
835 {
836 uint16_t i = pPool->aPages[iToFree].iAgePrev;
837 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
838 {
839 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
840 continue;
841 iToFree = i;
842 break;
843 }
844 }
845*/
846 Assert(iToFree != iUser);
847 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
848
849 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
850 if (rc == VINF_SUCCESS)
851 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
852 return rc;
853}
854
855
856/**
857 * Checks if a kind mismatch is really a page being reused
858 * or if it's just normal remappings.
859 *
860 * @returns true if reused and the cached page (enmKind1) should be flushed
861 * @returns false if not reused.
862 * @param enmKind1 The kind of the cached page.
863 * @param enmKind2 The kind of the requested page.
864 */
865static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
866{
867 switch (enmKind1)
868 {
869 /*
870 * Never reuse them. There is no remapping in non-paging mode.
871 */
872 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
873 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
874 return true;
875
876 /*
877 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
878 */
879 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
880 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
881 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
882 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
883 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
884 switch (enmKind2)
885 {
886 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
887 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
888 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
889 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
890 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
891 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
892 return true;
893 default:
894 return false;
895 }
896
897 /*
898 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
899 */
900 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
901 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
902 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
903 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
904 switch (enmKind2)
905 {
906 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
907 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
908 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
909 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
910 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
911 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
912 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
913 return true;
914 default:
915 return false;
916 }
917
918 /*
919 * These cannot be flushed, and it's common to reuse the PDs as PTs.
920 */
921 case PGMPOOLKIND_ROOT_32BIT_PD:
922 case PGMPOOLKIND_ROOT_PAE_PD:
923 case PGMPOOLKIND_ROOT_PDPTR:
924 case PGMPOOLKIND_ROOT_PML4:
925 return false;
926
927 default:
928 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
929 }
930}
931
932
933/**
934 * Attempts to satisfy a pgmPoolAlloc request from the cache.
935 *
936 * @returns VBox status code.
937 * @retval VINF_PGM_CACHED_PAGE on success.
938 * @retval VERR_FILE_NOT_FOUND if not found.
939 * @param pPool The pool.
940 * @param GCPhys The GC physical address of the page we're gonna shadow.
941 * @param enmKind The kind of mapping.
942 * @param iUser The shadow page pool index of the user table.
943 * @param iUserTable The index into the user table (shadowed).
944 * @param ppPage Where to store the pointer to the page.
945 */
946static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
947{
948 /*
949 * Look up the GCPhys in the hash.
950 */
951 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
952 if (i != NIL_PGMPOOL_IDX)
953 {
954 do
955 {
956 PPGMPOOLPAGE pPage = &pPool->aPages[i];
957 if (pPage->GCPhys == GCPhys)
958 {
959 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
960 {
961 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
962 if (VBOX_SUCCESS(rc))
963 {
964 *ppPage = pPage;
965 STAM_COUNTER_INC(&pPool->StatCacheHits);
966 return VINF_PGM_CACHED_PAGE;
967 }
968 return rc;
969 }
970
971 /*
972 * The kind is different. In some cases we should now flush the page
973 * as it has been reused, but in most cases this is normal remapping
974 * of PDs as PT or big pages using the GCPhys field in a slightly
975 * different way than the other kinds.
976 */
977 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
978 {
979 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
980 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
981 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
982 break;
983 }
984 }
985
986 /* next */
987 i = pPage->iNext;
988 } while (i != NIL_PGMPOOL_IDX);
989 }
990
991 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
992 STAM_COUNTER_INC(&pPool->StatCacheMisses);
993 return VERR_FILE_NOT_FOUND;
994}
995
996
997/**
998 * Inserts a page into the cache.
999 *
1000 * @param pPool The pool.
1001 * @param pPage The cached page.
1002 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1003 */
1004static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1005{
1006 /*
1007 * Insert into the GCPhys hash if the page is fit for that.
1008 */
1009 Assert(!pPage->fCached);
1010 if (fCanBeCached)
1011 {
1012 pPage->fCached = true;
1013 pgmPoolHashInsert(pPool, pPage);
1014 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1015 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1016 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1017 }
1018 else
1019 {
1020 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1021 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1022 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1023 }
1024
1025 /*
1026 * Insert at the head of the age list.
1027 */
1028 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1029 pPage->iAgeNext = pPool->iAgeHead;
1030 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1031 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1032 else
1033 pPool->iAgeTail = pPage->idx;
1034 pPool->iAgeHead = pPage->idx;
1035}
1036
1037
1038/**
1039 * Flushes a cached page.
1040 *
1041 * @param pPool The pool.
1042 * @param pPage The cached page.
1043 */
1044static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1045{
1046 /*
1047 * Remove the page from the hash.
1048 */
1049 if (pPage->fCached)
1050 {
1051 pPage->fCached = false;
1052 pgmPoolHashRemove(pPool, pPage);
1053 }
1054 else
1055 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1056
1057 /*
1058 * Remove it from the age list.
1059 */
1060 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1061 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1062 else
1063 pPool->iAgeTail = pPage->iAgePrev;
1064 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1065 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1066 else
1067 pPool->iAgeHead = pPage->iAgeNext;
1068 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1069 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1070}
1071#endif /* PGMPOOL_WITH_CACHE */
1072
1073
1074#ifdef PGMPOOL_WITH_MONITORING
1075/**
1076 * Looks for pages sharing the monitor.
1077 *
1078 * @returns Pointer to the head page.
1079 * @returns NULL if not found.
1080 * @param pPool The Pool
1081 * @param pNewPage The page which is going to be monitored.
1082 */
1083static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1084{
1085#ifdef PGMPOOL_WITH_CACHE
1086 /*
1087 * Look up the GCPhys in the hash.
1088 */
1089 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1090 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1091 if (i == NIL_PGMPOOL_IDX)
1092 return NULL;
1093 do
1094 {
1095 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1096 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1097 && pPage != pNewPage)
1098 {
1099 switch (pPage->enmKind)
1100 {
1101 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1102 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1103 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1104 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1105 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1106 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1107 case PGMPOOLKIND_ROOT_32BIT_PD:
1108 case PGMPOOLKIND_ROOT_PAE_PD:
1109 case PGMPOOLKIND_ROOT_PDPTR:
1110 case PGMPOOLKIND_ROOT_PML4:
1111 {
1112 /* find the head */
1113 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1114 {
1115 Assert(pPage->iMonitoredPrev != pPage->idx);
1116 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1117 }
1118 return pPage;
1119 }
1120
1121 /* ignore, no monitoring. */
1122 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1123 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1124 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1125 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1126 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1127 break;
1128 default:
1129 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1130 }
1131 }
1132
1133 /* next */
1134 i = pPage->iNext;
1135 } while (i != NIL_PGMPOOL_IDX);
1136#endif
1137 return NULL;
1138}
1139
1140/**
1141 * Enabled write monitoring of a guest page.
1142 *
1143 * @returns VBox status code.
1144 * @retval VINF_SUCCESS on success.
1145 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1146 * @param pPool The pool.
1147 * @param pPage The cached page.
1148 */
1149static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1150{
1151 /*
1152 * Filter out the relevant kinds.
1153 */
1154 switch (pPage->enmKind)
1155 {
1156 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1157 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1158 break;
1159
1160 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1161 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1162 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1163 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1164 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1165 /* Nothing to monitor here. */
1166 return VINF_SUCCESS;
1167
1168 case PGMPOOLKIND_ROOT_32BIT_PD:
1169 case PGMPOOLKIND_ROOT_PAE_PD:
1170#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1171 break;
1172#endif
1173 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1174 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1175 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1176 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1177 case PGMPOOLKIND_ROOT_PDPTR:
1178 case PGMPOOLKIND_ROOT_PML4:
1179 default:
1180 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1181 }
1182
1183 /*
1184 * Install handler.
1185 */
1186 int rc;
1187 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1188 if (pPageHead)
1189 {
1190 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1191 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1192 pPage->iMonitoredPrev = pPageHead->idx;
1193 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1194 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1195 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1196 pPageHead->iMonitoredNext = pPage->idx;
1197 rc = VINF_SUCCESS;
1198 }
1199 else
1200 {
1201 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1202 PVM pVM = pPool->CTXSUFF(pVM);
1203 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1204 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1205 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1206 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1207 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1208 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
1209 pPool->pszAccessHandler);
1210 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1211 * the heap size should suffice. */
1212 AssertFatalRC(rc);
1213 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1214 rc = VERR_PGM_POOL_CLEARED;
1215 }
1216 pPage->fMonitored = true;
1217 return rc;
1218}
1219
1220
1221/**
1222 * Disables write monitoring of a guest page.
1223 *
1224 * @returns VBox status code.
1225 * @retval VINF_SUCCESS on success.
1226 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1227 * @param pPool The pool.
1228 * @param pPage The cached page.
1229 */
1230static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1231{
1232 /*
1233 * Filter out the relevant kinds.
1234 */
1235 switch (pPage->enmKind)
1236 {
1237 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1238 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1239 break;
1240
1241 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1242 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1243 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1244 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1245 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1246 /* Nothing to monitor here. */
1247 return VINF_SUCCESS;
1248
1249 case PGMPOOLKIND_ROOT_32BIT_PD:
1250 case PGMPOOLKIND_ROOT_PAE_PD:
1251#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1252 break;
1253#endif
1254 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1255 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1256 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1257 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1258 case PGMPOOLKIND_ROOT_PDPTR:
1259 case PGMPOOLKIND_ROOT_PML4:
1260 default:
1261 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1262 }
1263
1264 /*
1265 * Remove the page from the monitored list or uninstall it if last.
1266 */
1267 const PVM pVM = pPool->CTXSUFF(pVM);
1268 int rc;
1269 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1270 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1271 {
1272 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1273 {
1274 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1275 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1276 pNewHead->fCR3Mix = pPage->fCR3Mix;
1277 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1278 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1279 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1280 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pNewHead),
1281 pPool->pszAccessHandler);
1282 AssertFatalRCSuccess(rc);
1283 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1284 }
1285 else
1286 {
1287 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1288 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1289 {
1290 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1291 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1292 }
1293 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1294 rc = VINF_SUCCESS;
1295 }
1296 }
1297 else
1298 {
1299 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1300 AssertFatalRC(rc);
1301 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1302 rc = VERR_PGM_POOL_CLEARED;
1303 }
1304 pPage->fMonitored = false;
1305
1306 /*
1307 * Remove it from the list of modified pages (if in it).
1308 */
1309 pgmPoolMonitorModifiedRemove(pPool, pPage);
1310
1311 return rc;
1312}
1313
1314
1315#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1316/**
1317 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1318 *
1319 * @param pPool The Pool.
1320 * @param pPage A page in the chain.
1321 * @param fCR3Mix The new fCR3Mix value.
1322 */
1323static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1324{
1325 /* current */
1326 pPage->fCR3Mix = fCR3Mix;
1327
1328 /* before */
1329 int16_t idx = pPage->iMonitoredPrev;
1330 while (idx != NIL_PGMPOOL_IDX)
1331 {
1332 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1333 idx = pPool->aPages[idx].iMonitoredPrev;
1334 }
1335
1336 /* after */
1337 idx = pPage->iMonitoredNext;
1338 while (idx != NIL_PGMPOOL_IDX)
1339 {
1340 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1341 idx = pPool->aPages[idx].iMonitoredNext;
1342 }
1343}
1344
1345
1346/**
1347 * Installs or modifies monitoring of a CR3 page (special).
1348 *
1349 * We're pretending the CR3 page is shadowed by the pool so we can use the
1350 * generic mechanisms in detecting chained monitoring. (This also gives us a
1351 * tast of what code changes are required to really pool CR3 shadow pages.)
1352 *
1353 * @returns VBox status code.
1354 * @param pPool The pool.
1355 * @param idxRoot The CR3 (root) page index.
1356 * @param GCPhysCR3 The (new) CR3 value.
1357 */
1358int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1359{
1360 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1361 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1362 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1363 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1364
1365 /*
1366 * The unlikely case where it already matches.
1367 */
1368 if (pPage->GCPhys == GCPhysCR3)
1369 {
1370 Assert(pPage->fMonitored);
1371 return VINF_SUCCESS;
1372 }
1373
1374 /*
1375 * Flush the current monitoring and remove it from the hash.
1376 */
1377 int rc = VINF_SUCCESS;
1378 if (pPage->fMonitored)
1379 {
1380 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1381 rc = pgmPoolMonitorFlush(pPool, pPage);
1382 if (rc == VERR_PGM_POOL_CLEARED)
1383 rc = VINF_SUCCESS;
1384 else
1385 AssertFatalRC(rc);
1386 pgmPoolHashRemove(pPool, pPage);
1387 }
1388
1389 /*
1390 * Monitor the page at the new location and insert it into the hash.
1391 */
1392 pPage->GCPhys = GCPhysCR3;
1393 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1394 if (rc2 != VERR_PGM_POOL_CLEARED)
1395 {
1396 AssertFatalRC(rc2);
1397 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1398 rc = rc2;
1399 }
1400 pgmPoolHashInsert(pPool, pPage);
1401 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1402 return rc;
1403}
1404
1405
1406/**
1407 * Removes the monitoring of a CR3 page (special).
1408 *
1409 * @returns VBox status code.
1410 * @param pPool The pool.
1411 * @param idxRoot The CR3 (root) page index.
1412 */
1413int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1414{
1415 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1416 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1417 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1418 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1419
1420 if (!pPage->fMonitored)
1421 return VINF_SUCCESS;
1422
1423 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1424 int rc = pgmPoolMonitorFlush(pPool, pPage);
1425 if (rc != VERR_PGM_POOL_CLEARED)
1426 AssertFatalRC(rc);
1427 else
1428 rc = VINF_SUCCESS;
1429 pgmPoolHashRemove(pPool, pPage);
1430 Assert(!pPage->fMonitored);
1431 pPage->GCPhys = NIL_RTGCPHYS;
1432 return rc;
1433}
1434#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1435
1436
1437/**
1438 * Inserts the page into the list of modified pages.
1439 *
1440 * @param pPool The pool.
1441 * @param pPage The page.
1442 */
1443void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1444{
1445 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1446 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1447 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1448 && pPool->iModifiedHead != pPage->idx,
1449 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1450 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1451 pPool->iModifiedHead, pPool->cModifiedPages));
1452
1453 pPage->iModifiedNext = pPool->iModifiedHead;
1454 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1455 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1456 pPool->iModifiedHead = pPage->idx;
1457 pPool->cModifiedPages++;
1458#ifdef VBOX_WITH_STATISTICS
1459 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1460 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1461#endif
1462}
1463
1464
1465/**
1466 * Removes the page from the list of modified pages and resets the
1467 * moficiation counter.
1468 *
1469 * @param pPool The pool.
1470 * @param pPage The page which is believed to be in the list of modified pages.
1471 */
1472static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1473{
1474 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1475 if (pPool->iModifiedHead == pPage->idx)
1476 {
1477 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1478 pPool->iModifiedHead = pPage->iModifiedNext;
1479 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1480 {
1481 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1482 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1483 }
1484 pPool->cModifiedPages--;
1485 }
1486 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1487 {
1488 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1489 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1490 {
1491 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1492 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1493 }
1494 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1495 pPool->cModifiedPages--;
1496 }
1497 else
1498 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1499 pPage->cModifications = 0;
1500}
1501
1502
1503/**
1504 * Zaps the list of modified pages, resetting their modification counters in the process.
1505 *
1506 * @param pVM The VM handle.
1507 */
1508void pgmPoolMonitorModifiedClearAll(PVM pVM)
1509{
1510 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1511 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1512
1513 unsigned cPages = 0; NOREF(cPages);
1514 uint16_t idx = pPool->iModifiedHead;
1515 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1516 while (idx != NIL_PGMPOOL_IDX)
1517 {
1518 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1519 idx = pPage->iModifiedNext;
1520 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1521 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1522 pPage->cModifications = 0;
1523 Assert(++cPages);
1524 }
1525 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1526 pPool->cModifiedPages = 0;
1527}
1528
1529
1530/**
1531 * Clear all shadow pages and clear all modification counters.
1532 *
1533 * @param pVM The VM handle.
1534 * @remark Should only be used when monitoring is available, thus placed in
1535 * the PGMPOOL_WITH_MONITORING #ifdef.
1536 */
1537void pgmPoolClearAll(PVM pVM)
1538{
1539 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1540 STAM_PROFILE_START(&pPool->StatClearAll, c);
1541 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1542
1543 /*
1544 * Iterate all the pages until we've encountered all that in use.
1545 * This is simple but not quite optimal solution.
1546 */
1547 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1548 unsigned cLeft = pPool->cUsedPages;
1549 unsigned iPage = pPool->cCurPages;
1550 while (--iPage >= PGMPOOL_IDX_FIRST)
1551 {
1552 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1553 if (pPage->GCPhys != NIL_RTGCPHYS)
1554 {
1555 switch (pPage->enmKind)
1556 {
1557 /*
1558 * We only care about shadow page tables.
1559 */
1560 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1561 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1562 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1563 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1564 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1565 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1566 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1567 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1568 {
1569#ifdef PGMPOOL_WITH_USER_TRACKING
1570 if (pPage->cPresent)
1571#endif
1572 {
1573 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1574 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1575 ASMMemZeroPage(pvShw);
1576 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1577#ifdef PGMPOOL_WITH_USER_TRACKING
1578 pPage->cPresent = 0;
1579 pPage->iFirstPresent = ~0;
1580#endif
1581 }
1582 }
1583 /* fall thru */
1584
1585 default:
1586 Assert(!pPage->cModifications || ++cModifiedPages);
1587 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1588 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1589 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1590 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1591 pPage->cModifications = 0;
1592 break;
1593
1594 }
1595 if (!--cLeft)
1596 break;
1597 }
1598 }
1599
1600 /* swipe the special pages too. */
1601 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1602 {
1603 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1604 if (pPage->GCPhys != NIL_RTGCPHYS)
1605 {
1606 Assert(!pPage->cModifications || ++cModifiedPages);
1607 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1608 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1609 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1610 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1611 pPage->cModifications = 0;
1612 }
1613 }
1614
1615 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1616 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1617 pPool->cModifiedPages = 0;
1618
1619#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1620 /*
1621 * Clear all the GCPhys links and rebuild the phys ext free list.
1622 */
1623 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
1624 pRam;
1625 pRam = pRam->CTXSUFF(pNext))
1626 {
1627 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1628 while (iPage-- > 0)
1629 pRam->aHCPhys[iPage] &= MM_RAM_FLAGS_NO_REFS_MASK;
1630 }
1631
1632 pPool->iPhysExtFreeHead = 0;
1633 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1634 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1635 for (unsigned i = 0; i < cMaxPhysExts; i++)
1636 {
1637 paPhysExts[i].iNext = i + 1;
1638 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1639 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1640 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1641 }
1642 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1643#endif
1644
1645
1646 pPool->cPresent = 0;
1647 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1648}
1649#endif /* PGMPOOL_WITH_MONITORING */
1650
1651
1652#ifdef PGMPOOL_WITH_USER_TRACKING
1653/**
1654 * Frees up at least one user entry.
1655 *
1656 * @returns VBox status code.
1657 * @retval VINF_SUCCESS if successfully added.
1658 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1659 * @param pPool The pool.
1660 * @param iUser The user index.
1661 */
1662static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1663{
1664 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1665#ifdef PGMPOOL_WITH_CACHE
1666 /*
1667 * Just free cached pages in a braindead fashion.
1668 */
1669 /** @todo walk the age list backwards and free the first with usage. */
1670 int rc = VINF_SUCCESS;
1671 do
1672 {
1673 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1674 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1675 rc = rc2;
1676 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1677 return rc;
1678#else
1679 /*
1680 * Lazy approach.
1681 */
1682 pgmPoolFlushAllInt(pPool);
1683 return VERR_PGM_POOL_FLUSHED;
1684#endif
1685}
1686
1687
1688/**
1689 * Inserts a page into the cache.
1690 *
1691 * This will create user node for the page, insert it into the GCPhys
1692 * hash, and insert it into the age list.
1693 *
1694 * @returns VBox status code.
1695 * @retval VINF_SUCCESS if successfully added.
1696 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1697 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1698 * @param pPool The pool.
1699 * @param pPage The cached page.
1700 * @param GCPhys The GC physical address of the page we're gonna shadow.
1701 * @param iUser The user index.
1702 * @param iUserTable The user table index.
1703 */
1704DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1705{
1706 int rc = VINF_SUCCESS;
1707 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1708
1709 /*
1710 * Find free a user node.
1711 */
1712 uint16_t i = pPool->iUserFreeHead;
1713 if (i == NIL_PGMPOOL_USER_INDEX)
1714 {
1715 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1716 if (VBOX_FAILURE(rc))
1717 return rc;
1718 i = pPool->iUserFreeHead;
1719 }
1720
1721 /*
1722 * Unlink the user node from the free list,
1723 * initialize and insert it into the user list.
1724 */
1725 pPool->iUserFreeHead = pUser[i].iNext;
1726 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1727 pUser[i].iUser = iUser;
1728 pUser[i].iUserTable = iUserTable;
1729 pPage->iUserHead = i;
1730
1731 /*
1732 * Insert into cache and enable monitoring of the guest page if enabled.
1733 *
1734 * Until we implement caching of all levels, including the CR3 one, we'll
1735 * have to make sure we don't try monitor & cache any recursive reuse of
1736 * a monitored CR3 page. Because all windows versions are doing this we'll
1737 * have to be able to do combined access monitoring, CR3 + PT and
1738 * PD + PT (guest PAE).
1739 *
1740 * Update:
1741 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1742 */
1743#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1744# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1745 const bool fCanBeMonitored = true;
1746# else
1747 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1748 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1749 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1750# endif
1751# ifdef PGMPOOL_WITH_CACHE
1752 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1753# endif
1754 if (fCanBeMonitored)
1755 {
1756# ifdef PGMPOOL_WITH_MONITORING
1757 rc = pgmPoolMonitorInsert(pPool, pPage);
1758 if (rc == VERR_PGM_POOL_CLEARED)
1759 {
1760 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1761# ifndef PGMPOOL_WITH_CACHE
1762 pgmPoolMonitorFlush(pPool, pPage);
1763 rc = VERR_PGM_POOL_FLUSHED;
1764# endif
1765 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1766 pUser[i].iNext = pPool->iUserFreeHead;
1767 pUser[i].iUser = NIL_PGMPOOL_IDX;
1768 pPool->iUserFreeHead = i;
1769 }
1770 }
1771# endif
1772#endif /* PGMPOOL_WITH_MONITORING */
1773 return rc;
1774}
1775
1776
1777# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1778/**
1779 * Adds a user reference to a page.
1780 *
1781 * This will
1782 * This will move the page to the head of the
1783 *
1784 * @returns VBox status code.
1785 * @retval VINF_SUCCESS if successfully added.
1786 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1787 * @param pPool The pool.
1788 * @param pPage The cached page.
1789 * @param iUser The user index.
1790 * @param iUserTable The user table.
1791 */
1792static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1793{
1794 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1795
1796# ifdef VBOX_STRICT
1797 /*
1798 * Check that the entry doesn't already exists.
1799 */
1800 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1801 {
1802 uint16_t i = pPage->iUserHead;
1803 do
1804 {
1805 Assert(i < pPool->cMaxUsers);
1806 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%d %d\n", iUser, iUserTable));
1807 i = paUsers[i].iNext;
1808 } while (i != NIL_PGMPOOL_USER_INDEX);
1809 }
1810# endif
1811
1812 /*
1813 * Allocate a user node.
1814 */
1815 uint16_t i = pPool->iUserFreeHead;
1816 if (i == NIL_PGMPOOL_USER_INDEX)
1817 {
1818 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1819 if (VBOX_FAILURE(rc))
1820 return rc;
1821 i = pPool->iUserFreeHead;
1822 }
1823 pPool->iUserFreeHead = paUsers[i].iNext;
1824
1825 /*
1826 * Initialize the user node and insert it.
1827 */
1828 paUsers[i].iNext = pPage->iUserHead;
1829 paUsers[i].iUser = iUser;
1830 paUsers[i].iUserTable = iUserTable;
1831 pPage->iUserHead = i;
1832
1833# ifdef PGMPOOL_WITH_CACHE
1834 /*
1835 * Tell the cache to update its replacement stats for this page.
1836 */
1837 pgmPoolCacheUsed(pPool, pPage);
1838# endif
1839 return VINF_SUCCESS;
1840}
1841# endif /* PGMPOOL_WITH_CACHE */
1842
1843
1844/**
1845 * Frees a user record associated with a page.
1846 *
1847 * This does not clear the entry in the user table, it simply replaces the
1848 * user record to the chain of free records.
1849 *
1850 * @param pPool The pool.
1851 * @param HCPhys The HC physical address of the shadow page.
1852 * @param iUser The shadow page pool index of the user table.
1853 * @param iUserTable The index into the user table (shadowed).
1854 */
1855static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1856{
1857 /*
1858 * Unlink and free the specified user entry.
1859 */
1860 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1861
1862 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1863 uint16_t i = pPage->iUserHead;
1864 if ( i != NIL_PGMPOOL_USER_INDEX
1865 && paUsers[i].iUser == iUser
1866 && paUsers[i].iUserTable == iUserTable)
1867 {
1868 pPage->iUserHead = paUsers[i].iNext;
1869
1870 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1871 paUsers[i].iNext = pPool->iUserFreeHead;
1872 pPool->iUserFreeHead = i;
1873 return;
1874 }
1875
1876 /* General: Linear search. */
1877 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1878 while (i != NIL_PGMPOOL_USER_INDEX)
1879 {
1880 if ( paUsers[i].iUser == iUser
1881 && paUsers[i].iUserTable == iUserTable)
1882 {
1883 if (iPrev != NIL_PGMPOOL_USER_INDEX)
1884 paUsers[iPrev].iNext = paUsers[i].iNext;
1885 else
1886 pPage->iUserHead = paUsers[i].iNext;
1887
1888 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1889 paUsers[i].iNext = pPool->iUserFreeHead;
1890 pPool->iUserFreeHead = i;
1891 return;
1892 }
1893 iPrev = i;
1894 i = paUsers[i].iNext;
1895 }
1896
1897 /* Fatal: didn't find it */
1898 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
1899 iUser, iUserTable, pPage->GCPhys));
1900}
1901
1902
1903/**
1904 * Gets the entry size of a shadow table.
1905 *
1906 * @param enmKind
1907 * The kind of page.
1908 *
1909 * @returns The size of the entry in bytes. That is, 4 or 8.
1910 * @returns If the kind is not for a table, an assertion is raised and 0 is
1911 * returned.
1912 */
1913DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
1914{
1915 switch (enmKind)
1916 {
1917 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1918 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1919 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1920 case PGMPOOLKIND_ROOT_32BIT_PD:
1921 return 4;
1922
1923 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1924 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1925 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1926 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1927 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1928 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1929 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1930 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1931 case PGMPOOLKIND_ROOT_PAE_PD:
1932 case PGMPOOLKIND_ROOT_PDPTR:
1933 case PGMPOOLKIND_ROOT_PML4:
1934 return 8;
1935
1936 default:
1937 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1938 }
1939}
1940
1941
1942/**
1943 * Gets the entry size of a guest table.
1944 *
1945 * @param enmKind
1946 * The kind of page.
1947 *
1948 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
1949 * @returns If the kind is not for a table, an assertion is raised and 0 is
1950 * returned.
1951 */
1952DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
1953{
1954 switch (enmKind)
1955 {
1956 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1957 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1958 case PGMPOOLKIND_ROOT_32BIT_PD:
1959 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1960 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1961 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1962 return 4;
1963
1964 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1965 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1966 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1967 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1968 case PGMPOOLKIND_ROOT_PAE_PD:
1969 case PGMPOOLKIND_ROOT_PDPTR:
1970 case PGMPOOLKIND_ROOT_PML4:
1971 return 8;
1972
1973 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1974 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1975 /** @todo can we return 0? (nobody is calling this...) */
1976 return 0;
1977
1978 default:
1979 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1980 }
1981}
1982
1983
1984#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1985/**
1986 * Scans one shadow page table for mappings of a physical page.
1987 *
1988 * @param pVM The VM handle.
1989 * @param pHCPhys The aHCPhys ramrange entry in question.
1990 * @param iShw The shadow page table.
1991 * @param cRefs The number of references made in that PT.
1992 */
1993static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCRTHCPHYS pHCPhys, uint16_t iShw, uint16_t cRefs)
1994{
1995 LogFlow(("pgmPoolTrackFlushGCPhysPT: pHCPhys=%p:{%RHp} iShw=%d cRefs=%d\n", pHCPhys, *pHCPhys, iShw, cRefs));
1996 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1997
1998 /*
1999 * Assert sanity.
2000 */
2001 Assert(cRefs == 1);
2002 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2003 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2004
2005 /*
2006 * Then, clear the actual mappings to the page in the shadow PT.
2007 */
2008 switch (pPage->enmKind)
2009 {
2010 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2011 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2012 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2013 {
2014 const uint32_t u32 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2015 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2016 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2017 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2018 {
2019 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2020 pPT->a[i].u = 0;
2021 cRefs--;
2022 if (!cRefs)
2023 return;
2024 }
2025#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2026 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2027 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2028 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2029 {
2030 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2031 pPT->a[i].u = 0;
2032 }
2033#endif
2034 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2035 break;
2036 }
2037
2038 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2039 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2040 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2041 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2042 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2043 {
2044 const uint64_t u64 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2045 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2046 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2047 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2048 {
2049 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2050 pPT->a[i].u = 0;
2051 cRefs--;
2052 if (!cRefs)
2053 return;
2054 }
2055#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2056 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2057 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2058 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2059 {
2060 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2061 pPT->a[i].u = 0;
2062 }
2063#endif
2064 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2065 break;
2066 }
2067
2068 default:
2069 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2070 }
2071}
2072
2073
2074/**
2075 * Scans one shadow page table for mappings of a physical page.
2076 *
2077 * @param pVM The VM handle.
2078 * @param pHCPhys The aHCPhys ramrange entry in question.
2079 * @param iShw The shadow page table.
2080 * @param cRefs The number of references made in that PT.
2081 */
2082void pgmPoolTrackFlushGCPhysPT(PVM pVM, PRTHCPHYS pHCPhys, uint16_t iShw, uint16_t cRefs)
2083{
2084 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2085 LogFlow(("pgmPoolTrackFlushGCPhysPT: pHCPhys=%p:{%RHp} iShw=%d cRefs=%d\n", pHCPhys, *pHCPhys, iShw, cRefs));
2086 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2087 pgmPoolTrackFlushGCPhysPTInt(pVM, pHCPhys, iShw, cRefs);
2088 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2089 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2090}
2091
2092
2093/**
2094 * Flushes a list of shadow page tables mapping the same physical page.
2095 *
2096 * @param pVM The VM handle.
2097 * @param pHCPhys The aHCPhys ramrange entry in question.
2098 * @param iPhysExt The physical cross reference extent list to flush.
2099 */
2100void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PRTHCPHYS pHCPhys, uint16_t iPhysExt)
2101{
2102 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2103 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2104 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pHCPhys=%p:{%RHp} iPhysExt\n", pHCPhys, *pHCPhys, iPhysExt));
2105
2106 const uint16_t iPhysExtStart = iPhysExt;
2107 PPGMPOOLPHYSEXT pPhysExt;
2108 do
2109 {
2110 Assert(iPhysExt < pPool->cMaxPhysExts);
2111 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2112 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2113 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2114 {
2115 pgmPoolTrackFlushGCPhysPTInt(pVM, pHCPhys, pPhysExt->aidx[i], 1);
2116 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2117 }
2118
2119 /* next */
2120 iPhysExt = pPhysExt->iNext;
2121 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2122
2123 /* insert the list into the free list and clear the ram range entry. */
2124 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2125 pPool->iPhysExtFreeHead = iPhysExtStart;
2126 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2127
2128 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2129}
2130#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2131
2132
2133/**
2134 * Scans all shadow page tables for mappings of a physical page.
2135 *
2136 * This may be slow, but it's most likely more efficient than cleaning
2137 * out the entire page pool / cache.
2138 *
2139 * @returns VBox status code.
2140 * @retval VINF_SUCCESS if all references has been successfully cleared.
2141 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2142 * a page pool cleaning.
2143 *
2144 * @param pVM The VM handle.
2145 * @param pHCPhys The aHCPhys ramrange entry in question.
2146 */
2147int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PRTHCPHYS pHCPhys)
2148{
2149 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2150 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2151 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d *pHCPhys=%RHp\n",
2152 pPool->cUsedPages, pPool->cPresent, *pHCPhys));
2153
2154#if 1
2155 /*
2156 * There is a limit to what makes sense.
2157 */
2158 if (pPool->cPresent > 1024)
2159 {
2160 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2161 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2162 return VINF_PGM_GCPHYS_ALIASED;
2163 }
2164#endif
2165
2166 /*
2167 * Iterate all the pages until we've encountered all that in use.
2168 * This is simple but not quite optimal solution.
2169 */
2170 const uint64_t u64 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2171 const uint32_t u32 = u64;
2172 unsigned cLeft = pPool->cUsedPages;
2173 unsigned iPage = pPool->cCurPages;
2174 while (--iPage >= PGMPOOL_IDX_FIRST)
2175 {
2176 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2177 if (pPage->GCPhys != NIL_RTGCPHYS)
2178 {
2179 switch (pPage->enmKind)
2180 {
2181 /*
2182 * We only care about shadow page tables.
2183 */
2184 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2185 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2186 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2187 {
2188 unsigned cPresent = pPage->cPresent;
2189 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2190 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2191 if (pPT->a[i].n.u1Present)
2192 {
2193 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2194 {
2195 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2196 pPT->a[i].u = 0;
2197 }
2198 if (!--cPresent)
2199 break;
2200 }
2201 break;
2202 }
2203
2204 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2205 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2206 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2207 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2208 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2209 {
2210 unsigned cPresent = pPage->cPresent;
2211 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2212 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2213 if (pPT->a[i].n.u1Present)
2214 {
2215 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2216 {
2217 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2218 pPT->a[i].u = 0;
2219 }
2220 if (!--cPresent)
2221 break;
2222 }
2223 break;
2224 }
2225 }
2226 if (!--cLeft)
2227 break;
2228 }
2229 }
2230
2231 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2232 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2233 return VINF_SUCCESS;
2234}
2235
2236
2237/**
2238 * Clears the user entry in a user table.
2239 *
2240 * This is used to remove all references to a page when flushing it.
2241 */
2242static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2243{
2244 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2245 Assert(pUser->iUser < pPool->cCurPages);
2246
2247 /*
2248 * Map the user page.
2249 */
2250 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2251 union
2252 {
2253 uint64_t *pau64;
2254 uint32_t *pau32;
2255 } u;
2256 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2257
2258#ifdef VBOX_STRICT
2259 /*
2260 * Some sanity checks.
2261 */
2262 switch (pUserPage->enmKind)
2263 {
2264 case PGMPOOLKIND_ROOT_32BIT_PD:
2265 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2266 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2267 break;
2268 case PGMPOOLKIND_ROOT_PAE_PD:
2269 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2270 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2271 break;
2272 case PGMPOOLKIND_ROOT_PDPTR:
2273 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2274 Assert(pUser->iUserTable < 4);
2275 break;
2276 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2277 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2278 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2279 break;
2280 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2281 case PGMPOOLKIND_ROOT_PML4:
2282 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2283 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2284 break;
2285 default:
2286 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2287 break;
2288 }
2289#endif /* VBOX_STRICT */
2290
2291 /*
2292 * Clear the entry in the user page.
2293 */
2294 switch (pUserPage->enmKind)
2295 {
2296 /* 32-bit entries */
2297 case PGMPOOLKIND_ROOT_32BIT_PD:
2298 u.pau32[pUser->iUserTable] = 0;
2299 break;
2300
2301 /* 64-bit entries */
2302 case PGMPOOLKIND_ROOT_PAE_PD:
2303 case PGMPOOLKIND_ROOT_PDPTR:
2304 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2305 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2306 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2307 case PGMPOOLKIND_ROOT_PML4:
2308 u.pau64[pUser->iUserTable] = 0;
2309 break;
2310
2311 default:
2312 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2313 }
2314}
2315
2316
2317/**
2318 * Clears all users of a page.
2319 */
2320static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2321{
2322 /*
2323 * Free all the user records.
2324 */
2325 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2326 uint16_t i = pPage->iUserHead;
2327 while (i != NIL_PGMPOOL_USER_INDEX)
2328 {
2329 /* Clear enter in user table. */
2330 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2331
2332 /* Free it. */
2333 const uint16_t iNext = paUsers[i].iNext;
2334 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2335 paUsers[i].iNext = pPool->iUserFreeHead;
2336 pPool->iUserFreeHead = i;
2337
2338 /* Next. */
2339 i = iNext;
2340 }
2341 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2342}
2343
2344
2345#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2346/**
2347 * Allocates a new physical cross reference extent.
2348 *
2349 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2350 * @param pVM The VM handle.
2351 * @param piPhysExt Where to store the phys ext index.
2352 */
2353PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2354{
2355 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2356 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2357 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2358 {
2359 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2360 return NULL;
2361 }
2362 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2363 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2364 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2365 *piPhysExt = iPhysExt;
2366 return pPhysExt;
2367}
2368
2369
2370/**
2371 * Frees a physical cross reference extent.
2372 *
2373 * @param pVM The VM handle.
2374 * @param iPhysExt The extent to free.
2375 */
2376void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2377{
2378 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2379 Assert(iPhysExt < pPool->cMaxPhysExts);
2380 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2381 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2382 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2383 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2384 pPool->iPhysExtFreeHead = iPhysExt;
2385}
2386
2387
2388/**
2389 * Frees a physical cross reference extent.
2390 *
2391 * @param pVM The VM handle.
2392 * @param iPhysExt The extent to free.
2393 */
2394void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2395{
2396 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2397
2398 const uint16_t iPhysExtStart = iPhysExt;
2399 PPGMPOOLPHYSEXT pPhysExt;
2400 do
2401 {
2402 Assert(iPhysExt < pPool->cMaxPhysExts);
2403 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2404 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2405 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2406
2407 /* next */
2408 iPhysExt = pPhysExt->iNext;
2409 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2410
2411 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2412 pPool->iPhysExtFreeHead = iPhysExtStart;
2413}
2414
2415/**
2416 * Insert a reference into a list of physical cross reference extents.
2417 *
2418 * @returns The new ram range flags (top 16-bits).
2419 *
2420 * @param pVM The VM handle.
2421 * @param iPhysExt The physical extent index of the list head.
2422 * @param iShwPT The shadow page table index.
2423 *
2424 */
2425static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2426{
2427 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2428 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2429
2430 /* special common case. */
2431 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2432 {
2433 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2434 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2435 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2436 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2437 }
2438
2439 /* general treatment. */
2440 const uint16_t iPhysExtStart = iPhysExt;
2441 unsigned cMax = 15;
2442 for (;;)
2443 {
2444 Assert(iPhysExt < pPool->cMaxPhysExts);
2445 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2446 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2447 {
2448 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2449 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2450 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2451 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2452 }
2453 if (!--cMax)
2454 {
2455 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2456 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2457 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2458 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2459 }
2460 }
2461
2462 /* add another extent to the list. */
2463 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2464 if (!pNew)
2465 {
2466 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2467 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2468 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2469 }
2470 pNew->iNext = iPhysExtStart;
2471 pNew->aidx[0] = iShwPT;
2472 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2473 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2474}
2475
2476
2477/**
2478 * Add a reference to guest physical page where extents are in use.
2479 *
2480 * @returns The new ram range flags (top 16-bits).
2481 *
2482 * @param pVM The VM handle.
2483 * @param u16 The ram range flags (top 16-bits).
2484 * @param iShwPT The shadow page table index.
2485 */
2486uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2487{
2488 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2489 {
2490 /*
2491 * Convert to extent list.
2492 */
2493 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2494 uint16_t iPhysExt;
2495 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2496 if (pPhysExt)
2497 {
2498 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2499 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2500 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2501 pPhysExt->aidx[1] = iShwPT;
2502 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2503 }
2504 else
2505 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2506 }
2507 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2508 {
2509 /*
2510 * Insert into the extent list.
2511 */
2512 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2513 }
2514 else
2515 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2516 return u16;
2517}
2518
2519
2520/**
2521 * Clear references to guest physical memory.
2522 *
2523 * @param pPool The pool.
2524 * @param pPage The page.
2525 * @param pHCPhys Pointer to the aHCPhys entry in the ram range.
2526 */
2527void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PRTHCPHYS pHCPhys)
2528{
2529 const unsigned cRefs = *pHCPhys >> MM_RAM_FLAGS_CREFS_SHIFT;
2530 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d *pHCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, *pHCPhys, pPage, pPage->idx));
2531
2532 uint16_t iPhysExt = (*pHCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2533 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2534 {
2535 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2536 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2537 do
2538 {
2539 Assert(iPhysExt < pPool->cMaxPhysExts);
2540
2541 /*
2542 * Look for the shadow page and check if it's all freed.
2543 */
2544 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2545 {
2546 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2547 {
2548 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2549
2550 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2551 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2552 {
2553 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d\n", *pHCPhys, pPage->idx));
2554 return;
2555 }
2556
2557 /* we can free the node. */
2558 PVM pVM = pPool->CTXSUFF(pVM);
2559 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2560 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2561 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2562 {
2563 /* lonely node */
2564 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2565 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d lonely\n", *pHCPhys, pPage->idx));
2566 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2567 }
2568 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2569 {
2570 /* head */
2571 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d head\n", *pHCPhys, pPage->idx));
2572 *pHCPhys = (*pHCPhys & MM_RAM_FLAGS_NO_REFS_MASK)
2573 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2574 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2575 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2576 }
2577 else
2578 {
2579 /* in list */
2580 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d\n", *pHCPhys, pPage->idx));
2581 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2582 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2583 }
2584 iPhysExt = iPhysExtNext;
2585 return;
2586 }
2587 }
2588
2589 /* next */
2590 iPhysExtPrev = iPhysExt;
2591 iPhysExt = paPhysExts[iPhysExt].iNext;
2592 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2593
2594 AssertFatalMsgFailed(("not-found! cRefs=%d *pHCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, *pHCPhys, pPage, pPage->idx));
2595 }
2596 else /* nothing to do */
2597 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64\n", *pHCPhys));
2598}
2599
2600
2601
2602/**
2603 * Clear references to guest physical memory.
2604 *
2605 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2606 * is assumed to be correct, so the linear search can be skipped and we can assert
2607 * at an earlier point.
2608 *
2609 * @param pPool The pool.
2610 * @param pPage The page.
2611 * @param HCPhys The host physical address corresponding to the guest page.
2612 * @param GCPhys The guest physical address corresponding to HCPhys.
2613 */
2614static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2615{
2616 /*
2617 * Walk range list.
2618 */
2619 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2620 while (pRam)
2621 {
2622 RTGCPHYS off = GCPhys - pRam->GCPhys;
2623 if (off < pRam->cb)
2624 {
2625 /* does it match? */
2626 const unsigned iPage = off >> PAGE_SHIFT;
2627 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2628 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2629 {
2630 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2631 return;
2632 }
2633 break;
2634 }
2635 pRam = CTXSUFF(pRam->pNext);
2636 }
2637 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2638}
2639
2640
2641/**
2642 * Clear references to guest physical memory.
2643 *
2644 * @param pPool The pool.
2645 * @param pPage The page.
2646 * @param HCPhys The host physical address corresponding to the guest page.
2647 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2648 */
2649static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2650{
2651 /*
2652 * Walk range list.
2653 */
2654 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2655 while (pRam)
2656 {
2657 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2658 if (off < pRam->cb)
2659 {
2660 /* does it match? */
2661 const unsigned iPage = off >> PAGE_SHIFT;
2662 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2663 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2664 {
2665 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2666 return;
2667 }
2668 break;
2669 }
2670 pRam = CTXSUFF(pRam->pNext);
2671 }
2672
2673 /*
2674 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2675 */
2676 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2677 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2678 while (pRam)
2679 {
2680 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2681 while (iPage-- > 0)
2682 {
2683 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2684 {
2685 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2686 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2687 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2688 return;
2689 }
2690 }
2691 pRam = CTXSUFF(pRam->pNext);
2692 }
2693
2694 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2695}
2696
2697
2698/**
2699 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2700 *
2701 * @param pPool The pool.
2702 * @param pPage The page.
2703 * @param pShwPT The shadow page table (mapping of the page).
2704 * @param pGstPT The guest page table.
2705 */
2706DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2707{
2708 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2709 if (pShwPT->a[i].n.u1Present)
2710 {
2711 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2712 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2713 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2714 if (!--pPage->cPresent)
2715 break;
2716 }
2717}
2718
2719
2720/**
2721 * Clear references to guest physical memory in a PAE / 32-bit page table.
2722 *
2723 * @param pPool The pool.
2724 * @param pPage The page.
2725 * @param pShwPT The shadow page table (mapping of the page).
2726 * @param pGstPT The guest page table (just a half one).
2727 */
2728DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2729{
2730 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2731 if (pShwPT->a[i].n.u1Present)
2732 {
2733 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2734 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2735 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2736 }
2737}
2738
2739
2740/**
2741 * Clear references to guest physical memory in a PAE / PAE page table.
2742 *
2743 * @param pPool The pool.
2744 * @param pPage The page.
2745 * @param pShwPT The shadow page table (mapping of the page).
2746 * @param pGstPT The guest page table.
2747 */
2748DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2749{
2750 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2751 if (pShwPT->a[i].n.u1Present)
2752 {
2753 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
2754 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
2755 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2756 }
2757}
2758
2759
2760/**
2761 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2762 *
2763 * @param pPool The pool.
2764 * @param pPage The page.
2765 * @param pShwPT The shadow page table (mapping of the page).
2766 */
2767DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2768{
2769 RTGCPHYS GCPhys = pPage->GCPhys;
2770 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2771 if (pShwPT->a[i].n.u1Present)
2772 {
2773 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2774 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2775 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2776 }
2777}
2778
2779
2780/**
2781 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2782 *
2783 * @param pPool The pool.
2784 * @param pPage The page.
2785 * @param pShwPT The shadow page table (mapping of the page).
2786 */
2787DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2788{
2789 RTGCPHYS GCPhys = pPage->GCPhys;
2790 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2791 if (pShwPT->a[i].n.u1Present)
2792 {
2793 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2794 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
2795 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2796 }
2797}
2798#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2799
2800
2801/**
2802 * Clear references to shadowed pages in a PAE page directory.
2803 *
2804 * @param pPool The pool.
2805 * @param pPage The page.
2806 * @param pShwPD The shadow page directory (mapping of the page).
2807 */
2808DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2809{
2810 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2811 {
2812 if (pShwPD->a[i].n.u1Present)
2813 {
2814 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2815 if (pSubPage)
2816 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2817 else
2818 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2819 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2820 }
2821 }
2822}
2823
2824
2825/**
2826 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2827 *
2828 * @param pPool The pool.
2829 * @param pPage The page.
2830 * @param pShwPdPtr The shadow page directory pointer table (mapping of the page).
2831 */
2832DECLINLINE(void) pgmPoolTrackDerefPDPTR64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPTR pShwPdPtr)
2833{
2834 for (unsigned i = 0; i < ELEMENTS(pShwPdPtr->a); i++)
2835 {
2836 if (pShwPdPtr->a[i].n.u1Present)
2837 {
2838 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPdPtr->a[i].u & X86_PDPE_PG_MASK);
2839 if (pSubPage)
2840 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2841 else
2842 AssertFatalMsgFailed(("%RX64\n", pShwPdPtr->a[i].u & X86_PDPE_PG_MASK));
2843 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2844 }
2845 }
2846}
2847
2848
2849/**
2850 * Clears all references made by this page.
2851 *
2852 * This includes other shadow pages and GC physical addresses.
2853 *
2854 * @param pPool The pool.
2855 * @param pPage The page.
2856 */
2857static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2858{
2859 /*
2860 * Map the shadow page and take action according to the page kind.
2861 */
2862 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2863 switch (pPage->enmKind)
2864 {
2865#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2866 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2867 {
2868 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2869 void *pvGst;
2870 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2871 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2872 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2873 break;
2874 }
2875
2876 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2877 {
2878 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2879 void *pvGst;
2880 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2881 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2882 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2883 break;
2884 }
2885
2886 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2887 {
2888 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2889 void *pvGst;
2890 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2891 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
2892 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2893 break;
2894 }
2895
2896 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
2897 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2898 {
2899 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2900 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
2901 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2902 break;
2903 }
2904
2905 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
2906 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2907 {
2908 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2909 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
2910 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2911 break;
2912 }
2913
2914#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2915 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2916 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2917 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2918 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2919 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2920 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2921 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2922 break;
2923#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2924
2925 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2926 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2927 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
2928 break;
2929
2930 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2931 pgmPoolTrackDerefPDPTR64Bit(pPool, pPage, (PX86PDPTR)pvShw);
2932 break;
2933
2934 default:
2935 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
2936 }
2937
2938 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
2939 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2940 ASMMemZeroPage(pvShw);
2941 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2942 pPage->fZeroed = true;
2943}
2944#endif /* PGMPOOL_WITH_USER_TRACKING */
2945
2946
2947/**
2948 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
2949 *
2950 * @param pPool The pool.
2951 */
2952static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
2953{
2954 /*
2955 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
2956 */
2957 Assert(NIL_PGMPOOL_IDX == 0);
2958 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
2959 {
2960 /*
2961 * Get the page address.
2962 */
2963 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2964 union
2965 {
2966 uint64_t *pau64;
2967 uint32_t *pau32;
2968 } u;
2969 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2970
2971 /*
2972 * Mark stuff not present.
2973 */
2974 switch (pPage->enmKind)
2975 {
2976 case PGMPOOLKIND_ROOT_32BIT_PD:
2977 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
2978 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2979 u.pau32[iPage] = 0;
2980 break;
2981
2982 case PGMPOOLKIND_ROOT_PAE_PD:
2983 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * 4; iPage++)
2984 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2985 u.pau64[iPage] = 0;
2986 break;
2987
2988 case PGMPOOLKIND_ROOT_PML4:
2989 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
2990 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
2991 u.pau64[iPage] = 0;
2992 break;
2993
2994 case PGMPOOLKIND_ROOT_PDPTR:
2995 /* Not root of shadowed pages currently, ignore it. */
2996 break;
2997 }
2998 }
2999
3000 /*
3001 * Paranoia (to be removed), flag a global CR3 sync.
3002 */
3003 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3004}
3005
3006
3007/**
3008 * Flushes the entire cache.
3009 *
3010 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3011 * and execute this CR3 flush.
3012 *
3013 * @param pPool The pool.
3014 */
3015static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3016{
3017 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3018 LogFlow(("pgmPoolFlushAllInt:\n"));
3019
3020 /*
3021 * If there are no pages in the pool, there is nothing to do.
3022 */
3023 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3024 {
3025 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3026 return;
3027 }
3028
3029 /*
3030 * Nuke the free list and reinsert all pages into it.
3031 */
3032 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3033 {
3034 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3035
3036#ifdef IN_RING3
3037 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3038#endif
3039#ifdef PGMPOOL_WITH_MONITORING
3040 if (pPage->fMonitored)
3041 pgmPoolMonitorFlush(pPool, pPage);
3042 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3043 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3044 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3045 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3046 pPage->cModifications = 0;
3047#endif
3048 pPage->GCPhys = NIL_RTGCPHYS;
3049 pPage->enmKind = PGMPOOLKIND_FREE;
3050 Assert(pPage->idx == i);
3051 pPage->iNext = i + 1;
3052 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3053 pPage->fSeenNonGlobal = false;
3054 pPage->fMonitored= false;
3055 pPage->fCached = false;
3056 pPage->fReusedFlushPending = false;
3057 pPage->fCR3Mix = false;
3058#ifdef PGMPOOL_WITH_USER_TRACKING
3059 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3060#endif
3061#ifdef PGMPOOL_WITH_CACHE
3062 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3063 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3064#endif
3065 }
3066 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3067 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3068 pPool->cUsedPages = 0;
3069
3070#ifdef PGMPOOL_WITH_USER_TRACKING
3071 /*
3072 * Zap and reinitialize the user records.
3073 */
3074 pPool->cPresent = 0;
3075 pPool->iUserFreeHead = 0;
3076 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3077 const unsigned cMaxUsers = pPool->cMaxUsers;
3078 for (unsigned i = 0; i < cMaxUsers; i++)
3079 {
3080 paUsers[i].iNext = i + 1;
3081 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3082 paUsers[i].iUserTable = 0xfffe;
3083 }
3084 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3085#endif
3086
3087#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3088 /*
3089 * Clear all the GCPhys links and rebuild the phys ext free list.
3090 */
3091 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
3092 pRam;
3093 pRam = pRam->CTXSUFF(pNext))
3094 {
3095 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3096 while (iPage-- > 0)
3097 pRam->aHCPhys[iPage] &= MM_RAM_FLAGS_NO_REFS_MASK;
3098 }
3099
3100 pPool->iPhysExtFreeHead = 0;
3101 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3102 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3103 for (unsigned i = 0; i < cMaxPhysExts; i++)
3104 {
3105 paPhysExts[i].iNext = i + 1;
3106 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3107 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3108 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3109 }
3110 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3111#endif
3112
3113#ifdef PGMPOOL_WITH_MONITORING
3114 /*
3115 * Just zap the modified list.
3116 */
3117 pPool->cModifiedPages = 0;
3118 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3119#endif
3120
3121#ifdef PGMPOOL_WITH_CACHE
3122 /*
3123 * Clear the GCPhys hash and the age list.
3124 */
3125 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3126 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3127 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3128 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3129#endif
3130
3131 /*
3132 * Flush all the special root pages.
3133 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3134 */
3135 pgmPoolFlushAllSpecialRoots(pPool);
3136 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3137 {
3138 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3139 pPage->iNext = NIL_PGMPOOL_IDX;
3140#ifdef PGMPOOL_WITH_MONITORING
3141 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3142 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3143 pPage->cModifications = 0;
3144 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3145 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3146 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3147 if (pPage->fMonitored)
3148 {
3149 PVM pVM = pPool->CTXSUFF(pVM);
3150 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3151 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3152 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3153 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
3154 pPool->pszAccessHandler);
3155 AssertFatalRCSuccess(rc);
3156# ifdef PGMPOOL_WITH_CACHE
3157 pgmPoolHashInsert(pPool, pPage);
3158# endif
3159 }
3160#endif
3161#ifdef PGMPOOL_WITH_USER_TRACKING
3162 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3163#endif
3164#ifdef PGMPOOL_WITH_CACHE
3165 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3166 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3167#endif
3168 }
3169
3170 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3171}
3172
3173
3174/**
3175 * Flushes a pool page.
3176 *
3177 * This moves the page to the free list after removing all user references to it.
3178 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3179 *
3180 * @returns VBox status code.
3181 * @retval VINF_SUCCESS on success.
3182 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3183 * @param pPool The pool.
3184 * @param HCPhys The HC physical address of the shadow page.
3185 */
3186int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3187{
3188 int rc = VINF_SUCCESS;
3189 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3190 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3191 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3192
3193 /*
3194 * Quietly reject any attempts at flushing any of the special root pages.
3195 */
3196 if (pPage->idx < PGMPOOL_IDX_FIRST)
3197 {
3198 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3199 return VINF_SUCCESS;
3200 }
3201
3202 /*
3203 * Mark the page as being in need of a ASMMemZeroPage().
3204 */
3205 pPage->fZeroed = false;
3206
3207#ifdef PGMPOOL_WITH_USER_TRACKING
3208 /*
3209 * Clear the page.
3210 */
3211 pgmPoolTrackClearPageUsers(pPool, pPage);
3212 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3213 pgmPoolTrackDeref(pPool, pPage);
3214 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3215#endif
3216
3217#ifdef PGMPOOL_WITH_CACHE
3218 /*
3219 * Flush it from the cache.
3220 */
3221 pgmPoolCacheFlushPage(pPool, pPage);
3222#endif /* PGMPOOL_WITH_CACHE */
3223
3224#ifdef PGMPOOL_WITH_MONITORING
3225 /*
3226 * Deregistering the monitoring.
3227 */
3228 if (pPage->fMonitored)
3229 rc = pgmPoolMonitorFlush(pPool, pPage);
3230#endif
3231
3232 /*
3233 * Free the page.
3234 */
3235 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3236 pPage->iNext = pPool->iFreeHead;
3237 pPool->iFreeHead = pPage->idx;
3238 pPage->enmKind = PGMPOOLKIND_FREE;
3239 pPage->GCPhys = NIL_RTGCPHYS;
3240 pPage->fReusedFlushPending = false;
3241
3242 pPool->cUsedPages--;
3243 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3244 return rc;
3245}
3246
3247
3248/**
3249 * Frees a usage of a pool page.
3250 *
3251 * The caller is responsible to updating the user table so that it no longer
3252 * references the shadow page.
3253 *
3254 * @param pPool The pool.
3255 * @param HCPhys The HC physical address of the shadow page.
3256 * @param iUser The shadow page pool index of the user table.
3257 * @param iUserTable The index into the user table (shadowed).
3258 */
3259void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3260{
3261 STAM_PROFILE_START(&pPool->StatFree, a);
3262 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3263 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3264 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3265#ifdef PGMPOOL_WITH_USER_TRACKING
3266 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3267#endif
3268#ifdef PGMPOOL_WITH_CACHE
3269 if (!pPage->fCached)
3270#endif
3271 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3272 STAM_PROFILE_STOP(&pPool->StatFree, a);
3273}
3274
3275
3276/**
3277 * Makes one or more free page free.
3278 *
3279 * @returns VBox status code.
3280 * @retval VINF_SUCCESS on success.
3281 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3282 *
3283 * @param pPool The pool.
3284 * @param iUser The user of the page.
3285 */
3286static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3287{
3288 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3289
3290 /*
3291 * If the pool isn't full grown yet, expand it.
3292 */
3293 if (pPool->cCurPages < pPool->cMaxPages)
3294 {
3295 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3296#ifdef IN_RING3
3297 int rc = PGMR3PoolGrow(pPool->pVMHC);
3298#else
3299 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3300#endif
3301 if (VBOX_FAILURE(rc))
3302 return rc;
3303 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3304 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3305 return VINF_SUCCESS;
3306 }
3307
3308#ifdef PGMPOOL_WITH_CACHE
3309 /*
3310 * Free one cached page.
3311 */
3312 return pgmPoolCacheFreeOne(pPool, iUser);
3313#else
3314 /*
3315 * Flush the pool.
3316 * If we have tracking enabled, it should be possible to come up with
3317 * a cheap replacement strategy...
3318 */
3319 pgmPoolFlushAllInt(pPool);
3320 return VERR_PGM_POOL_FLUSHED;
3321#endif
3322}
3323
3324
3325/**
3326 * Allocates a page from the pool.
3327 *
3328 * This page may actually be a cached page and not in need of any processing
3329 * on the callers part.
3330 *
3331 * @returns VBox status code.
3332 * @retval VINF_SUCCESS if a NEW page was allocated.
3333 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3334 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3335 * @param pVM The VM handle.
3336 * @param GCPhys The GC physical address of the page we're gonna shadow.
3337 * For 4MB and 2MB PD entries, it's the first address the
3338 * shadow PT is covering.
3339 * @param enmKind The kind of mapping.
3340 * @param iUser The shadow page pool index of the user table.
3341 * @param iUserTable The index into the user table (shadowed).
3342 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3343 */
3344int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3345{
3346 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3347 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3348 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3349
3350 *ppPage = NULL;
3351
3352#ifdef PGMPOOL_WITH_CACHE
3353 if (pPool->fCacheEnabled)
3354 {
3355 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3356 if (VBOX_SUCCESS(rc2))
3357 {
3358 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3359 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3360 return rc2;
3361 }
3362 }
3363#endif
3364
3365 /*
3366 * Allocate a new one.
3367 */
3368 int rc = VINF_SUCCESS;
3369 uint16_t iNew = pPool->iFreeHead;
3370 if (iNew == NIL_PGMPOOL_IDX)
3371 {
3372 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3373 if (VBOX_FAILURE(rc))
3374 {
3375 if (rc != VERR_PGM_POOL_CLEARED)
3376 {
3377 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3378 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3379 return rc;
3380 }
3381 rc = VERR_PGM_POOL_FLUSHED;
3382 }
3383 iNew = pPool->iFreeHead;
3384 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3385 }
3386
3387 /* unlink the free head */
3388 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3389 pPool->iFreeHead = pPage->iNext;
3390 pPage->iNext = NIL_PGMPOOL_IDX;
3391
3392 /*
3393 * Initialize it.
3394 */
3395 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3396 pPage->enmKind = enmKind;
3397 pPage->GCPhys = GCPhys;
3398 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3399 pPage->fMonitored = false;
3400 pPage->fCached = false;
3401 pPage->fReusedFlushPending = false;
3402 pPage->fCR3Mix = false;
3403#ifdef PGMPOOL_WITH_MONITORING
3404 pPage->cModifications = 0;
3405 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3406 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3407#endif
3408#ifdef PGMPOOL_WITH_USER_TRACKING
3409 pPage->cPresent = 0;
3410 pPage->iFirstPresent = ~0;
3411
3412 /*
3413 * Insert into the tracking and cache. If this fails, free the page.
3414 */
3415 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3416 if (VBOX_FAILURE(rc3))
3417 {
3418 if (rc3 != VERR_PGM_POOL_CLEARED)
3419 {
3420 pPool->cUsedPages--;
3421 pPage->enmKind = PGMPOOLKIND_FREE;
3422 pPage->GCPhys = NIL_RTGCPHYS;
3423 pPage->iNext = pPool->iFreeHead;
3424 pPool->iFreeHead = pPage->idx;
3425 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3426 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3427 return rc3;
3428 }
3429 rc = VERR_PGM_POOL_FLUSHED;
3430 }
3431#endif /* PGMPOOL_WITH_USER_TRACKING */
3432
3433 /*
3434 * Commit the allocation, clear the page and return.
3435 */
3436#ifdef VBOX_WITH_STATISTICS
3437 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3438 pPool->cUsedPagesHigh = pPool->cUsedPages;
3439#endif
3440
3441 if (!pPage->fZeroed)
3442 {
3443 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3444 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3445 ASMMemZeroPage(pv);
3446 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3447 }
3448
3449 *ppPage = pPage;
3450 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3451 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3452 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3453 return rc;
3454}
3455
3456
3457/**
3458 * Frees a usage of a pool page.
3459 *
3460 * @param pVM The VM handle.
3461 * @param HCPhys The HC physical address of the shadow page.
3462 * @param iUser The shadow page pool index of the user table.
3463 * @param iUserTable The index into the user table (shadowed).
3464 */
3465void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3466{
3467 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3468 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3469 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3470}
3471
3472
3473/**
3474 * Gets a in-use page in the pool by it's physical address.
3475 *
3476 * @returns Pointer to the page.
3477 * @param pVM The VM handle.
3478 * @param HCPhys The HC physical address of the shadow page.
3479 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3480 */
3481PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3482{
3483 /** @todo profile this! */
3484 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3485 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3486 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3487 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3488 return pPage;
3489}
3490
3491
3492/**
3493 * Flushes the entire cache.
3494 *
3495 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3496 * and execute this CR3 flush.
3497 *
3498 * @param pPool The pool.
3499 */
3500void pgmPoolFlushAll(PVM pVM)
3501{
3502 LogFlow(("pgmPoolFlushAll:\n"));
3503 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3504}
3505
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette