VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 2152

Last change on this file since 2152 was 2152, checked in by vboxsync, 18 years ago

Never reuse non-paging shadow page tables.

  • Property svn:keywords set to Id
File size: 118.9 KB
Line 
1/* $Id: PGMAllPool.cpp 2152 2007-04-18 09:22:33Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006 InnoTek Systemberatung GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * If you received this file as part of a commercial VirtualBox
18 * distribution, then only the terms of your commercial VirtualBox
19 * license agreement apply instead of the previous paragraph.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37
38#include <VBox/log.h>
39#include <VBox/err.h>
40#include <iprt/asm.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46__BEGIN_DECLS
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48#ifdef PGMPOOL_WITH_USER_TRACKING
49DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
50DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
51static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#endif
53#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
54static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
55#endif
56#ifdef PGMPOOL_WITH_CACHE
57static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
58#endif
59#ifdef PGMPOOL_WITH_MONITORING
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#endif
62#ifndef IN_RING3
63DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
64#endif
65__END_DECLS
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88#ifdef IN_GC
89/**
90 * Maps a pool page into the current context.
91 *
92 * @returns Pointer to the mapping.
93 * @param pVM The VM handle.
94 * @param pPage The page to map.
95 */
96void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
97{
98 /* general pages. */
99 if (pPage->idx >= PGMPOOL_IDX_FIRST)
100 {
101 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
102 void *pv;
103 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
104 AssertReleaseRC(rc);
105 return pv;
106 }
107
108 /* special pages. */
109 switch (pPage->idx)
110 {
111 case PGMPOOL_IDX_PD:
112 return pVM->pgm.s.pGC32BitPD;
113 case PGMPOOL_IDX_PAE_PD:
114 return pVM->pgm.s.apGCPaePDs[0];
115 case PGMPOOL_IDX_PDPTR:
116 return pVM->pgm.s.pGCPaePDPTR;
117 case PGMPOOL_IDX_PML4:
118 return pVM->pgm.s.pGCPaePML4;
119 default:
120 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
121 return NULL;
122 }
123}
124#endif /* IN_GC */
125
126
127#ifdef PGMPOOL_WITH_MONITORING
128/**
129 * Determin the size of a write instruction.
130 * @returns number of bytes written.
131 * @param pDis The disassembler state.
132 */
133static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
134{
135 /*
136 * This is very crude and possibly wrong for some opcodes,
137 * but since it's not really supposed to be called we can
138 * probably live with that.
139 */
140 return DISGetParamSize(pDis, &pDis->param1);
141}
142
143
144/**
145 * Flushes a chain of pages sharing the same access monitor.
146 *
147 * @returns VBox status code suitable for scheduling.
148 * @param pPool The pool.
149 * @param pPage A page in the chain.
150 */
151int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
152{
153 /*
154 * Find the list head.
155 */
156 uint16_t idx = pPage->idx;
157 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
158 {
159 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
160 {
161 idx = pPage->iMonitoredPrev;
162 Assert(idx != pPage->idx);
163 pPage = &pPool->aPages[idx];
164 }
165 }
166
167 /*
168 * Itereate the list flushing each shadow page.
169 */
170 int rc = VINF_SUCCESS;
171 for (;;)
172 {
173 idx = pPage->iMonitoredNext;
174 Assert(idx != pPage->idx);
175 if (pPage->idx >= PGMPOOL_IDX_FIRST)
176 {
177 int rc2 = pgmPoolFlushPage(pPool, pPage);
178 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
179 rc = VINF_PGM_SYNC_CR3;
180 }
181 /* next */
182 if (idx == NIL_PGMPOOL_IDX)
183 break;
184 pPage = &pPool->aPages[idx];
185 }
186 return rc;
187}
188
189
190/**
191 * Wrapper for getting the current context pointer to the entry begin modified.
192 *
193 * @returns Pointer to the current context mapping of the entry.
194 * @param pPool The pool.
195 * @param pvFault The fault virtual address.
196 * @param GCPhysFault The fault physical address.
197 * @param cbEntry The entry size.
198 */
199#ifdef IN_RING3
200DECLINLINE(void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
201#else
202DECLINLINE(void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
203#endif
204{
205#ifdef IN_GC
206 return (RTGCPTR)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
207
208#elif defined(IN_RING0)
209 void *pvRet;
210 int rc = PGMRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
211 AssertFatalRCSuccess(rc);
212 return pvRet;
213
214#elif defined(IN_RING3)
215 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
216#else
217# error "huh?"
218#endif
219}
220
221
222/**
223 * Process shadow entries before they are changed by the guest.
224 *
225 * For PT entries we will clear them. For PD entries, we'll simply check
226 * for mapping conflicts and set the SyncCR3 FF if found.
227 *
228 * @param pPool The pool.
229 * @param pPage The head page.
230 * @param GCPhysFault The guest physical fault address.
231 * @param uAddress In R0 and GC this is the guest context fault address (flat).
232 * In R3 this is the host context 'fault' address.
233 * @param pCpu The disassembler state for figuring out the write size.
234 * This need not be specified if the caller knows we won't do cross entry accesses.
235 */
236#ifdef IN_RING3
237void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
238#else
239void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
240#endif
241{
242 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
243 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
244 for (;;)
245 {
246 union
247 {
248 void *pv;
249 PX86PT pPT;
250 PX86PTPAE pPTPae;
251 PX86PD pPD;
252 PX86PDPAE pPDPae;
253 } uShw;
254 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
255
256 switch (pPage->enmKind)
257 {
258 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
259 {
260 const unsigned iShw = off / sizeof(X86PTE);
261 if (uShw.pPT->a[iShw].n.u1Present)
262 {
263# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
264 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
265 pgmPoolTracDerefGCPhysHint(pPool, pPage,
266 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
267 pGstPte->u & X86_PTE_PG_MASK);
268# endif
269 uShw.pPT->a[iShw].u = 0;
270 }
271 break;
272 }
273
274 /* page/2 sized */
275 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
276 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
277 {
278 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
279 if (uShw.pPTPae->a[iShw].n.u1Present)
280 {
281# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
282 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
283 pgmPoolTracDerefGCPhysHint(pPool, pPage,
284 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
285 pGstPte->u & X86_PTE_PG_MASK);
286# endif
287 uShw.pPTPae->a[iShw].u = 0;
288 }
289 }
290 break;
291
292 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
293 {
294 const unsigned iShw = off / sizeof(X86PTPAE);
295 if (uShw.pPTPae->a[iShw].n.u1Present)
296 {
297# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
298 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
299 pgmPoolTracDerefGCPhysHint(pPool, pPage,
300 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
301 pGstPte->u & X86_PTE_PAE_PG_MASK);
302# endif
303 uShw.pPTPae->a[iShw].u = 0;
304 }
305 break;
306 }
307
308 case PGMPOOLKIND_ROOT_32BIT_PD:
309 {
310 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
311 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
312 {
313 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
314 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
315 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
316 }
317 /* paranoia / a bit assumptive. */
318 else if ( pCpu
319 && (off & 4)
320 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
321 {
322 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
323 if ( iShw2 != iShw
324 && iShw2 < ELEMENTS(uShw.pPD->a)
325 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
326 {
327 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
328 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
329 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
330 }
331 }
332#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
333 if ( uShw.pPD->a[iShw].n.u1Present
334 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
335 {
336 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
337# ifdef IN_GC /* TLB load - we're pushing things a bit... */
338 ASMProbeReadByte(pvAddress);
339# endif
340 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
341 uShw.pPD->a[iShw].u = 0;
342 }
343#endif
344 break;
345 }
346
347 case PGMPOOLKIND_ROOT_PAE_PD:
348 {
349 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
350 for (unsigned i = 0; i < 2; i++, iShw++)
351 {
352 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
353 {
354 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
355 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
356 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
357 }
358 /* paranoia / a bit assumptive. */
359 else if ( pCpu
360 && (off & 4)
361 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
362 {
363 const unsigned iShw2 = iShw + 2;
364 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
365 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
366 {
367 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
368 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
369 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
370 }
371 }
372#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
373 if ( uShw.pPDPae->a[iShw].n.u1Present
374 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
375 {
376 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
377# ifdef IN_GC /* TLB load - we're pushing things a bit... */
378 ASMProbeReadByte(pvAddress);
379# endif
380 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
381 uShw.pPDPae->a[iShw].u = 0;
382 }
383#endif
384 }
385 break;
386 }
387
388 default:
389 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
390 }
391
392 /* next */
393 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
394 return;
395 pPage = &pPool->aPages[pPage->iMonitoredNext];
396 }
397}
398
399
400# ifndef IN_RING3
401/**
402 * Checks if a access could be a fork operation in progress.
403 *
404 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
405 *
406 * @returns true if it's likly that we're forking, otherwise false.
407 * @param pPool The pool.
408 * @param pCpu The disassembled instruction.
409 * @param offFault The access offset.
410 */
411DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
412{
413 /*
414 * i386 linux is using btr to clear X86_PTE_RW.
415 * The functions involved are (2.6.16 source inspection):
416 * clear_bit
417 * ptep_set_wrprotect
418 * copy_one_pte
419 * copy_pte_range
420 * copy_pmd_range
421 * copy_pud_range
422 * copy_page_range
423 * dup_mmap
424 * dup_mm
425 * copy_mm
426 * copy_process
427 * do_fork
428 */
429 if ( pCpu->pCurInstr->opcode == OP_BTR
430 && !(offFault & 4)
431 /** @todo Validate that the bit index is X86_PTE_RW. */
432 )
433 {
434 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
435 return true;
436 }
437 return false;
438}
439
440
441/**
442 * Determin whether the page is likely to have been reused.
443 *
444 * @returns true if we consider the page as being reused for a different purpose.
445 * @returns false if we consider it to still be a paging page.
446 * @param pPage The page in question.
447 * @param pCpu The disassembly info for the faulting insturction.
448 * @param pvFault The fault address.
449 *
450 * @remark The REP prefix check is left to the caller because of STOSD/W.
451 */
452DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
453{
454 switch (pCpu->pCurInstr->opcode)
455 {
456 case OP_PUSH:
457 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
458 return true;
459 case OP_PUSHF:
460 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
461 return true;
462 case OP_PUSHA:
463 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
464 return true;
465 case OP_FXSAVE:
466 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
467 return true;
468 }
469 if ( (pCpu->param1.flags & USE_REG_GEN32)
470 && (pCpu->param1.base.reg_gen32 == USE_REG_ESP))
471 {
472 Log4(("pgmPoolMonitorIsReused: ESP\n"));
473 return true;
474 }
475
476 //if (pPage->fCR3Mix)
477 // return false;
478 return false;
479}
480
481
482/**
483 * Flushes the page being accessed.
484 *
485 * @returns VBox status code suitable for scheduling.
486 * @param pVM The VM handle.
487 * @param pPool The pool.
488 * @param pPage The pool page (head).
489 * @param pCpu The disassembly of the write instruction.
490 * @param pRegFrame The trap register frame.
491 * @param GCPhysFault The fault address as guest physical address.
492 * @param pvFault The fault address.
493 */
494static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
495 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
496{
497 /*
498 * First, do the flushing.
499 */
500 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
501
502 /*
503 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
504 */
505 uint32_t cbWritten;
506 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
507 if (VBOX_SUCCESS(rc2))
508 pRegFrame->eip += pCpu->opsize;
509 else if (rc2 == VERR_EM_INTERPRETER)
510 {
511#ifdef IN_GC
512 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip))
513 {
514 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04:%RGv, ignoring.\n",
515 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
516 rc = VINF_SUCCESS;
517 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
518 }
519 else
520#endif
521 {
522 rc = VINF_EM_RAW_EMULATE_INSTR;
523 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
524 }
525 }
526 else
527 rc = rc2;
528
529 /* See use in pgmPoolAccessHandlerSimple(). */
530 PGM_INVL_GUEST_TLBS();
531
532 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
533 return rc;
534
535}
536
537
538/**
539 * Handles the STOSD write accesses.
540 *
541 * @returns VBox status code suitable for scheduling.
542 * @param pVM The VM handle.
543 * @param pPool The pool.
544 * @param pPage The pool page (head).
545 * @param pCpu The disassembly of the write instruction.
546 * @param pRegFrame The trap register frame.
547 * @param GCPhysFault The fault address as guest physical address.
548 * @param pvFault The fault address.
549 */
550DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
551 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
552{
553 /*
554 * Increment the modification counter and insert it into the list
555 * of modified pages the first time.
556 */
557 if (!pPage->cModifications++)
558 pgmPoolMonitorModifiedInsert(pPool, pPage);
559
560 /*
561 * Execute REP STOSD.
562 *
563 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
564 * write situation, meaning that it's safe to write here.
565 */
566#ifdef IN_GC
567 uint32_t *pu32 = (uint32_t *)pvFault;
568#else
569 RTGCPTR pu32 = pvFault;
570#endif
571 while (pRegFrame->ecx)
572 {
573 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pu32, NULL);
574#ifdef IN_GC
575 *pu32++ = pRegFrame->eax;
576#else
577 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
578 pu32 += 4;
579#endif
580 GCPhysFault += 4;
581 pRegFrame->edi += 4;
582 pRegFrame->ecx--;
583 }
584 pRegFrame->eip += pCpu->opsize;
585
586 /* See use in pgmPoolAccessHandlerSimple(). */
587 PGM_INVL_GUEST_TLBS();
588
589 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
590 return VINF_SUCCESS;
591}
592
593
594/**
595 * Handles the simple write accesses.
596 *
597 * @returns VBox status code suitable for scheduling.
598 * @param pVM The VM handle.
599 * @param pPool The pool.
600 * @param pPage The pool page (head).
601 * @param pCpu The disassembly of the write instruction.
602 * @param pRegFrame The trap register frame.
603 * @param GCPhysFault The fault address as guest physical address.
604 * @param pvFault The fault address.
605 */
606DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
607 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
608{
609 /*
610 * Increment the modification counter and insert it into the list
611 * of modified pages the first time.
612 */
613 if (!pPage->cModifications++)
614 pgmPoolMonitorModifiedInsert(pPool, pPage);
615
616 /*
617 * Clear all the pages. ASSUMES that pvFault is readable.
618 */
619 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
620
621 /*
622 * Interpret the instruction.
623 */
624 uint32_t cb;
625 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
626 if (VBOX_SUCCESS(rc))
627 pRegFrame->eip += pCpu->opsize;
628 else if (rc == VERR_EM_INTERPRETER)
629 {
630# ifdef IN_GC
631 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)(RTGCUINTPTR)pCpu->opaddr))
632 {
633 /* We're not able to handle this in ring-3, so fix the interpreter! */
634 /** @note Should be fine. There's no need to flush the whole thing. */
635#ifndef DEBUG_sandervl
636 AssertMsgFailed(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
637 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
638#endif
639 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch1);
640 rc = pgmPoolMonitorChainFlush(pPool, pPage);
641 }
642 else
643# endif
644 {
645 rc = VINF_EM_RAW_EMULATE_INSTR;
646 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
647 }
648 }
649
650 /*
651 * Quick hack, with logging enabled we're getting stale
652 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
653 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
654 * have to be fixed to support this. But that'll have to wait till next week.
655 *
656 * An alternative is to keep track of the changed PTEs together with the
657 * GCPhys from the guest PT. This may proove expensive though.
658 *
659 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
660 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
661 */
662 PGM_INVL_GUEST_TLBS();
663
664 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
665 return rc;
666}
667
668
669/**
670 * \#PF Handler callback for PT write accesses.
671 *
672 * @returns VBox status code (appropriate for GC return).
673 * @param pVM VM Handle.
674 * @param uErrorCode CPU Error code.
675 * @param pRegFrame Trap register frame.
676 * NULL on DMA and other non CPU access.
677 * @param pvFault The fault address (cr2).
678 * @param GCPhysFault The GC physical address corresponding to pvFault.
679 * @param pvUser User argument.
680 */
681DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
682{
683 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
684 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
685 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
686 LogFlow(("pgmPoolAccessHandler: pvFault=%p pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
687
688 /*
689 * We should ALWAYS have the list head as user parameter. This
690 * is because we use that page to record the changes.
691 */
692 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
693
694 /*
695 * Disassemble the faulting instruction.
696 */
697 DISCPUSTATE Cpu;
698 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
699 AssertRCReturn(rc, rc);
700
701 /*
702 * Check if it's worth dealing with.
703 */
704 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
705 || pPage->fCR3Mix)
706 && !pgmPoolMonitorIsReused(pPage, &Cpu,pvFault)
707 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
708 {
709 /*
710 * Simple instructions, no REP prefix.
711 */
712 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
713 {
714 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
715 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
716 return rc;
717 }
718
719 /*
720 * Windows is frequently doing small memset() operations (netio test 4k+).
721 * We have to deal with these or we'll kill the cache and performance.
722 */
723 if ( Cpu.pCurInstr->opcode == OP_STOSWD
724 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
725 && pRegFrame->ecx <= 0x20
726 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
727 && !((uintptr_t)pvFault & 3)
728 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
729 && Cpu.mode == CPUMODE_32BIT
730 && Cpu.opmode == CPUMODE_32BIT
731 && Cpu.addrmode == CPUMODE_32BIT
732 && Cpu.prefix == PREFIX_REP
733 && !pRegFrame->eflags.Bits.u1DF
734 )
735 {
736 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
737 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
738 return rc;
739 }
740
741 /* REP prefix, don't bother. */
742 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
743 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
744 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
745 }
746
747 /*
748 * Not worth it, so flush it.
749 */
750 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
751 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
752 return rc;
753}
754
755# endif /* !IN_RING3 */
756#endif /* PGMPOOL_WITH_MONITORING */
757
758
759
760#ifdef PGMPOOL_WITH_CACHE
761/**
762 * Inserts a page into the GCPhys hash table.
763 *
764 * @param pPool The pool.
765 * @param pPage The page.
766 */
767DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
768{
769 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
770 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
771 pPage->iNext = pPool->aiHash[iHash];
772 pPool->aiHash[iHash] = pPage->idx;
773}
774
775
776/**
777 * Removes a page from the GCPhys hash table.
778 *
779 * @param pPool The pool.
780 * @param pPage The page.
781 */
782DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
783{
784 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
785 if (pPool->aiHash[iHash] == pPage->idx)
786 pPool->aiHash[iHash] = pPage->iNext;
787 else
788 {
789 uint16_t iPrev = pPool->aiHash[iHash];
790 for (;;)
791 {
792 const int16_t i = pPool->aPages[iPrev].iNext;
793 if (i == pPage->idx)
794 {
795 pPool->aPages[iPrev].iNext = pPage->iNext;
796 break;
797 }
798 if (i == NIL_PGMPOOL_IDX)
799 {
800 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
801 break;
802 }
803 iPrev = i;
804 }
805 }
806 pPage->iNext = NIL_PGMPOOL_IDX;
807}
808
809
810/**
811 * Frees up one cache page.
812 *
813 * @returns VBox status code.
814 * @retval VINF_SUCCESS on success.
815 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
816 * @param pPool The pool.
817 * @param iUser The user index.
818 */
819static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
820{
821 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
822 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
823
824 /*
825 * Select one page from the tail of the age list.
826 */
827 uint16_t iToFree = pPool->iAgeTail;
828 if (iToFree == iUser)
829 iToFree = pPool->aPages[iToFree].iAgePrev;
830/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
831 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
832 {
833 uint16_t i = pPool->aPages[iToFree].iAgePrev;
834 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
835 {
836 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
837 continue;
838 iToFree = i;
839 break;
840 }
841 }
842*/
843 Assert(iToFree != iUser);
844 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
845
846 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
847 if (rc == VINF_SUCCESS)
848 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
849 return rc;
850}
851
852
853/**
854 * Checks if a kind mismatch is really a page being reused
855 * or if it's just normal remappings.
856 *
857 * @returns true if reused and the cached page (enmKind1) should be flushed
858 * @returns false if not reused.
859 * @param enmKind1 The kind of the cached page.
860 * @param enmKind2 The kind of the requested page.
861 */
862static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
863{
864 switch (enmKind1)
865 {
866 /*
867 * Never reuse them. There is no remapping in non-paging mode.
868 */
869 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
870 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
871 return true;
872
873 /*
874 * It's prefectly fine to reuse these, except for PAE stuff.
875 */
876 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
877 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
878 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
879 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
880 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
881 switch (enmKind2)
882 {
883 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
884 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
885 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
886 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
887 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
888 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
889 return true;
890 default:
891 return false;
892 }
893
894 /*
895 * It's prefectly fine to reuse these, except for non-PAE stuff.
896 */
897 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
898 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
899 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
900 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
901 switch (enmKind2)
902 {
903 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
904 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
905 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
906 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
907 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
908 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
909 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
910 return true;
911 default:
912 return false;
913 }
914
915 /*
916 * These cannot be flushed, and it's common to reuse the PDs as PTs.
917 */
918 case PGMPOOLKIND_ROOT_32BIT_PD:
919 case PGMPOOLKIND_ROOT_PAE_PD:
920 case PGMPOOLKIND_ROOT_PDPTR:
921 case PGMPOOLKIND_ROOT_PML4:
922 return false;
923
924 default:
925 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
926 }
927}
928
929
930/**
931 * Attempts to satisfy a pgmPoolAlloc request from the cache.
932 *
933 * @returns VBox status code.
934 * @retval VINF_PGM_CACHED_PAGE on success.
935 * @retval VERR_FILE_NOT_FOUND if not found.
936 * @param pPool The pool.
937 * @param GCPhys The GC physical address of the page we're gonna shadow.
938 * @param enmKind The kind of mapping.
939 * @param iUser The shadow page pool index of the user table.
940 * @param iUserTable The index into the user table (shadowed).
941 * @param ppPage Where to store the pointer to the page.
942 */
943static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
944{
945 /*
946 * Look up the GCPhys in the hash.
947 */
948 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
949 if (i != NIL_PGMPOOL_IDX)
950 {
951 do
952 {
953 PPGMPOOLPAGE pPage = &pPool->aPages[i];
954 if (pPage->GCPhys == GCPhys)
955 {
956 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
957 {
958 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
959 if (VBOX_SUCCESS(rc))
960 {
961 *ppPage = pPage;
962 STAM_COUNTER_INC(&pPool->StatCacheHits);
963 return VINF_PGM_CACHED_PAGE;
964 }
965 return rc;
966 }
967
968 /*
969 * The kind is different. In some cases we should now flush the page
970 * as it has been reused, but in most cases this is normal remapping
971 * of PDs as PT or big pages using the GCPhys field in a slighly
972 * different way than the other kinds.
973 */
974 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
975 {
976 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
977 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
978 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
979 break;
980 }
981 }
982
983 /* next */
984 i = pPage->iNext;
985 } while (i != NIL_PGMPOOL_IDX);
986 }
987
988 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
989 STAM_COUNTER_INC(&pPool->StatCacheMisses);
990 return VERR_FILE_NOT_FOUND;
991}
992
993
994/**
995 * Inserts a page into the cache.
996 *
997 * @param pPool The pool.
998 * @param pPage The cached page.
999 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1000 */
1001static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1002{
1003 /*
1004 * Insert into the GCPhys hash if the page is fit for that.
1005 */
1006 Assert(!pPage->fCached);
1007 if (fCanBeCached)
1008 {
1009 pPage->fCached = true;
1010 pgmPoolHashInsert(pPool, pPage);
1011 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1012 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1013 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1014 }
1015 else
1016 {
1017 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1018 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1019 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1020 }
1021
1022 /*
1023 * Insert at the head of the age list.
1024 */
1025 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1026 pPage->iAgeNext = pPool->iAgeHead;
1027 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1028 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1029 else
1030 pPool->iAgeTail = pPage->idx;
1031 pPool->iAgeHead = pPage->idx;
1032}
1033
1034
1035/**
1036 * Flushes a cached page.
1037 *
1038 * @param pPool The pool.
1039 * @param pPage The cached page.
1040 */
1041static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1042{
1043 /*
1044 * Remove the page from the hash.
1045 */
1046 if (pPage->fCached)
1047 {
1048 pPage->fCached = false;
1049 pgmPoolHashRemove(pPool, pPage);
1050 }
1051 else
1052 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1053
1054 /*
1055 * Remove it from the age list.
1056 */
1057 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1058 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1059 else
1060 pPool->iAgeTail = pPage->iAgePrev;
1061 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1062 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1063 else
1064 pPool->iAgeHead = pPage->iAgeNext;
1065 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1066 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1067}
1068#endif /* PGMPOOL_WITH_CACHE */
1069
1070
1071#ifdef PGMPOOL_WITH_MONITORING
1072/**
1073 * Looks for pages sharing the monitor.
1074 *
1075 * @returns Pointer to the head page.
1076 * @returns NULL if not found.
1077 * @param pPool The Pool
1078 * @param pNewPage The page which is going to be monitored.
1079 */
1080static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1081{
1082#ifdef PGMPOOL_WITH_CACHE
1083 /*
1084 * Look up the GCPhys in the hash.
1085 */
1086 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1087 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1088 if (i == NIL_PGMPOOL_IDX)
1089 return NULL;
1090 do
1091 {
1092 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1093 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1094 && pPage != pNewPage)
1095 {
1096 switch (pPage->enmKind)
1097 {
1098 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1099 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1100 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1101 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1102 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1103 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1104 case PGMPOOLKIND_ROOT_32BIT_PD:
1105 case PGMPOOLKIND_ROOT_PAE_PD:
1106 case PGMPOOLKIND_ROOT_PDPTR:
1107 case PGMPOOLKIND_ROOT_PML4:
1108 {
1109 /* find the head */
1110 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1111 {
1112 Assert(pPage->iMonitoredPrev != pPage->idx);
1113 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1114 }
1115 return pPage;
1116 }
1117
1118 /* ignore, no monitoring. */
1119 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1120 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1121 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1122 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1123 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1124 break;
1125 default:
1126 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1127 }
1128 }
1129
1130 /* next */
1131 i = pPage->iNext;
1132 } while (i != NIL_PGMPOOL_IDX);
1133#endif
1134 return NULL;
1135}
1136
1137/**
1138 * Enabled write monitoring of a guest page.
1139 *
1140 * @returns VBox status code.
1141 * @retval VINF_SUCCESS on success.
1142 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1143 * @param pPool The pool.
1144 * @param pPage The cached page.
1145 */
1146static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1147{
1148 /*
1149 * Filter out the relevant kinds.
1150 */
1151 switch (pPage->enmKind)
1152 {
1153 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1154 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1155 break;
1156
1157 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1158 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1159 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1160 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1161 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1162 /* Nothing to monitor here. */
1163 return VINF_SUCCESS;
1164
1165 case PGMPOOLKIND_ROOT_32BIT_PD:
1166 case PGMPOOLKIND_ROOT_PAE_PD:
1167#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1168 break;
1169#endif
1170 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1171 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1172 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1173 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1174 case PGMPOOLKIND_ROOT_PDPTR:
1175 case PGMPOOLKIND_ROOT_PML4:
1176 default:
1177 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1178 }
1179
1180 /*
1181 * Install handler.
1182 */
1183 int rc;
1184 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1185 if (pPageHead)
1186 {
1187 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1188 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1189 pPage->iMonitoredPrev = pPageHead->idx;
1190 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1191 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1192 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1193 pPageHead->iMonitoredNext = pPage->idx;
1194 rc = VINF_SUCCESS;
1195 }
1196 else
1197 {
1198 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1199 PVM pVM = pPool->CTXSUFF(pVM);
1200 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1201 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1202 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1203 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pPage),
1204 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pPage),
1205 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pPage),
1206 pPool->pszAccessHandler);
1207 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1208 * the heap size should suffice. */
1209 AssertFatalRC(rc);
1210 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1211 rc = VERR_PGM_POOL_CLEARED;
1212 }
1213 pPage->fMonitored = true;
1214 return rc;
1215}
1216
1217
1218/**
1219 * Disables write monitoring of a guest page.
1220 *
1221 * @returns VBox status code.
1222 * @retval VINF_SUCCESS on success.
1223 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1224 * @param pPool The pool.
1225 * @param pPage The cached page.
1226 */
1227static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1228{
1229 /*
1230 * Filter out the relevant kinds.
1231 */
1232 switch (pPage->enmKind)
1233 {
1234 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1235 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1236 break;
1237
1238 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1239 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1240 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1241 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1242 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1243 /* Nothing to monitor here. */
1244 return VINF_SUCCESS;
1245
1246 case PGMPOOLKIND_ROOT_32BIT_PD:
1247 case PGMPOOLKIND_ROOT_PAE_PD:
1248#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1249 break;
1250#endif
1251 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1252 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1253 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1254 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1255 case PGMPOOLKIND_ROOT_PDPTR:
1256 case PGMPOOLKIND_ROOT_PML4:
1257 default:
1258 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1259 }
1260
1261 /*
1262 * Remove the page from the monitored list or uninstall it if last.
1263 */
1264 const PVM pVM = pPool->CTXSUFF(pVM);
1265 int rc;
1266 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1267 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1268 {
1269 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1270 {
1271 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1272 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1273 pNewHead->fCR3Mix = pPage->fCR3Mix;
1274 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1275 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pNewHead),
1276 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pNewHead),
1277 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pNewHead),
1278 pPool->pszAccessHandler);
1279 AssertFatalRCSuccess(rc);
1280 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1281 }
1282 else
1283 {
1284 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1285 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1286 {
1287 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1288 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1289 }
1290 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1291 rc = VINF_SUCCESS;
1292 }
1293 }
1294 else
1295 {
1296 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1297 AssertFatalRC(rc);
1298 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1299 rc = VERR_PGM_POOL_CLEARED;
1300 }
1301 pPage->fMonitored = false;
1302
1303 /*
1304 * Remove it from the list of modified pages (if in it).
1305 */
1306 pgmPoolMonitorModifiedRemove(pPool, pPage);
1307
1308 return rc;
1309}
1310
1311
1312#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1313/**
1314 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1315 *
1316 * @param pPool The Pool.
1317 * @param pPage A page in the chain.
1318 * @param fCR3Mix The new fCR3Mix value.
1319 */
1320static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1321{
1322 /* current */
1323 pPage->fCR3Mix = fCR3Mix;
1324
1325 /* before */
1326 int16_t idx = pPage->iMonitoredPrev;
1327 while (idx != NIL_PGMPOOL_IDX)
1328 {
1329 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1330 idx = pPool->aPages[idx].iMonitoredPrev;
1331 }
1332
1333 /* after */
1334 idx = pPage->iMonitoredNext;
1335 while (idx != NIL_PGMPOOL_IDX)
1336 {
1337 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1338 idx = pPool->aPages[idx].iMonitoredNext;
1339 }
1340}
1341
1342
1343/**
1344 * Installs or modifies monitoring of a CR3 page (special).
1345 *
1346 * We're pretending the CR3 page is shadowed by the pool so we can use the
1347 * generic mechanisms in detecting chained monitoring. (This also gives us a
1348 * tast of what code changes are required to really pool CR3 shadow pages.)
1349 *
1350 * @returns VBox status code.
1351 * @param pPool The pool.
1352 * @param idxRoot The CR3 (root) page index.
1353 * @param GCPhysCR3 The (new) CR3 value.
1354 */
1355int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1356{
1357 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1358 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1359 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1360 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1361
1362 /*
1363 * The unlikely case where it already matches.
1364 */
1365 if (pPage->GCPhys == GCPhysCR3)
1366 {
1367 Assert(pPage->fMonitored);
1368 return VINF_SUCCESS;
1369 }
1370
1371 /*
1372 * Flush the current monitoring and remove it from the hash.
1373 */
1374 int rc = VINF_SUCCESS;
1375 if (pPage->fMonitored)
1376 {
1377 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1378 rc = pgmPoolMonitorFlush(pPool, pPage);
1379 if (rc == VERR_PGM_POOL_CLEARED)
1380 rc = VINF_SUCCESS;
1381 else
1382 AssertFatalRC(rc);
1383 pgmPoolHashRemove(pPool, pPage);
1384 }
1385
1386 /*
1387 * Monitor the page at the new location and insert it into the hash.
1388 */
1389 pPage->GCPhys = GCPhysCR3;
1390 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1391 if (rc2 != VERR_PGM_POOL_CLEARED)
1392 {
1393 AssertFatalRC(rc2);
1394 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1395 rc = rc2;
1396 }
1397 pgmPoolHashInsert(pPool, pPage);
1398 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1399 return rc;
1400}
1401
1402
1403/**
1404 * Removes the monitoring of a CR3 page (special).
1405 *
1406 * @returns VBox status code.
1407 * @param pPool The pool.
1408 * @param idxRoot The CR3 (root) page index.
1409 */
1410int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1411{
1412 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1413 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1414 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1415 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1416
1417 if (!pPage->fMonitored)
1418 return VINF_SUCCESS;
1419
1420 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1421 int rc = pgmPoolMonitorFlush(pPool, pPage);
1422 if (rc != VERR_PGM_POOL_CLEARED)
1423 AssertFatalRC(rc);
1424 else
1425 rc = VINF_SUCCESS;
1426 pgmPoolHashRemove(pPool, pPage);
1427 Assert(!pPage->fMonitored);
1428 pPage->GCPhys = NIL_RTGCPHYS;
1429 return rc;
1430}
1431#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1432
1433
1434/**
1435 * Inserts the page into the list of modified pages.
1436 *
1437 * @param pPool The pool.
1438 * @param pPage The page.
1439 */
1440void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1441{
1442 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1443 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1444 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1445 && pPool->iModifiedHead != pPage->idx,
1446 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1447 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1448 pPool->iModifiedHead, pPool->cModifiedPages));
1449
1450 pPage->iModifiedNext = pPool->iModifiedHead;
1451 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1452 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1453 pPool->iModifiedHead = pPage->idx;
1454 pPool->cModifiedPages++;
1455#ifdef VBOX_WITH_STATISTICS
1456 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1457 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1458#endif
1459}
1460
1461
1462/**
1463 * Removes the page from the list of modified pages and resets the
1464 * moficiation counter.
1465 *
1466 * @param pPool The pool.
1467 * @param pPage The page which is believed to be in the list of modified pages.
1468 */
1469static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1470{
1471 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1472 if (pPool->iModifiedHead == pPage->idx)
1473 {
1474 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1475 pPool->iModifiedHead = pPage->iModifiedNext;
1476 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1477 {
1478 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1479 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1480 }
1481 pPool->cModifiedPages--;
1482 }
1483 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1484 {
1485 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1486 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1487 {
1488 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1489 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1490 }
1491 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1492 pPool->cModifiedPages--;
1493 }
1494 else
1495 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1496 pPage->cModifications = 0;
1497}
1498
1499
1500/**
1501 * Zaps the list of modified pages, resetting their modification counters in the process.
1502 *
1503 * @param pVM The VM handle.
1504 */
1505void pgmPoolMonitorModifiedClearAll(PVM pVM)
1506{
1507 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1508 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1509
1510 unsigned cPages = 0; NOREF(cPages);
1511 uint16_t idx = pPool->iModifiedHead;
1512 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1513 while (idx != NIL_PGMPOOL_IDX)
1514 {
1515 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1516 idx = pPage->iModifiedNext;
1517 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1518 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1519 pPage->cModifications = 0;
1520 Assert(++cPages);
1521 }
1522 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1523 pPool->cModifiedPages = 0;
1524}
1525
1526
1527/**
1528 * Clear all shadow pages and clear all modification counters.
1529 *
1530 * @param pVM The VM handle.
1531 * @remark Should only be used when monitoring is available, thus placed in
1532 * the PGMPOOL_WITH_MONITORING #ifdef.
1533 */
1534void pgmPoolClearAll(PVM pVM)
1535{
1536 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1537 STAM_PROFILE_START(&pPool->StatClearAll, c);
1538 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1539
1540 /*
1541 * Iterate all the pages until we've encountered all that in use.
1542 * This is simple but not quite optimal solution.
1543 */
1544 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1545 unsigned cLeft = pPool->cUsedPages;
1546 unsigned iPage = pPool->cCurPages;
1547 while (--iPage >= PGMPOOL_IDX_FIRST)
1548 {
1549 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1550 if (pPage->GCPhys != NIL_RTGCPHYS)
1551 {
1552 switch (pPage->enmKind)
1553 {
1554 /*
1555 * We only care about shadow page tables.
1556 */
1557 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1558 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1559 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1560 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1561 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1562 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1563 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1564 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1565 {
1566#ifdef PGMPOOL_WITH_USER_TRACKING
1567 if (pPage->cPresent)
1568#endif
1569 {
1570 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1571 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1572 ASMMemZeroPage(pvShw);
1573 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1574#ifdef PGMPOOL_WITH_USER_TRACKING
1575 pPage->cPresent = 0;
1576 pPage->iFirstPresent = ~0;
1577#endif
1578 }
1579 }
1580 /* fall thru */
1581
1582 default:
1583 Assert(!pPage->cModifications || ++cModifiedPages);
1584 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1585 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1586 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1587 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1588 pPage->cModifications = 0;
1589 break;
1590
1591 }
1592 if (!--cLeft)
1593 break;
1594 }
1595 }
1596
1597 /* swipe the special pages too. */
1598 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1599 {
1600 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1601 if (pPage->GCPhys != NIL_RTGCPHYS)
1602 {
1603 Assert(!pPage->cModifications || ++cModifiedPages);
1604 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1605 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1606 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1607 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1608 pPage->cModifications = 0;
1609 }
1610 }
1611
1612 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1613 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1614 pPool->cModifiedPages = 0;
1615
1616#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1617 /*
1618 * Clear all the GCPhys links and rebuild the phys ext free list.
1619 */
1620 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
1621 pRam;
1622 pRam = pRam->CTXSUFF(pNext))
1623 {
1624 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1625 while (iPage-- > 0)
1626 pRam->aHCPhys[iPage] &= MM_RAM_FLAGS_NO_REFS_MASK;
1627 }
1628
1629 pPool->iPhysExtFreeHead = 0;
1630 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1631 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1632 for (unsigned i = 0; i < cMaxPhysExts; i++)
1633 {
1634 paPhysExts[i].iNext = i + 1;
1635 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1636 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1637 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1638 }
1639 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1640#endif
1641
1642
1643 pPool->cPresent = 0;
1644 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1645}
1646#endif /* PGMPOOL_WITH_MONITORING */
1647
1648
1649#ifdef PGMPOOL_WITH_USER_TRACKING
1650/**
1651 * Frees up at least one user entry.
1652 *
1653 * @returns VBox status code.
1654 * @retval VINF_SUCCESS if successfully added.
1655 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1656 * @param pPool The pool.
1657 * @param iUser The user index.
1658 */
1659static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1660{
1661 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1662#ifdef PGMPOOL_WITH_CACHE
1663 /*
1664 * Just free cached pages in a braindead fashion.
1665 */
1666 /** @todo walk the age list backwards and free the first with usage. */
1667 int rc = VINF_SUCCESS;
1668 do
1669 {
1670 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1671 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1672 rc = rc2;
1673 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1674 return rc;
1675#else
1676 /*
1677 * Lazy approach.
1678 */
1679 pgmPoolFlushAllInt(pPool);
1680 return VERR_PGM_POOL_FLUSHED;
1681#endif
1682}
1683
1684
1685/**
1686 * Inserts a page into the cache.
1687 *
1688 * This will create user node for the page, insert it into the GCPhys
1689 * hash, and insert it into the age list.
1690 *
1691 * @returns VBox status code.
1692 * @retval VINF_SUCCESS if successfully added.
1693 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1694 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1695 * @param pPool The pool.
1696 * @param pPage The cached page.
1697 * @param GCPhys The GC physical address of the page we're gonna shadow.
1698 * @param iUser The user index.
1699 * @param iUserTable The user table index.
1700 */
1701DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1702{
1703 int rc = VINF_SUCCESS;
1704 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1705
1706 /*
1707 * Find free a user node.
1708 */
1709 uint16_t i = pPool->iUserFreeHead;
1710 if (i == NIL_PGMPOOL_USER_INDEX)
1711 {
1712 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1713 if (VBOX_FAILURE(rc))
1714 return rc;
1715 i = pPool->iUserFreeHead;
1716 }
1717
1718 /*
1719 * Unlink the user node from the free list,
1720 * initialize and insert it into the user list.
1721 */
1722 pPool->iUserFreeHead = pUser[i].iNext;
1723 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1724 pUser[i].iUser = iUser;
1725 pUser[i].iUserTable = iUserTable;
1726 pPage->iUserHead = i;
1727
1728 /*
1729 * Insert into cache and enable monitoring of the guest page if enabled.
1730 *
1731 * Until we implement caching of all levels, including the CR3 one, we'll
1732 * have to make sure we don't try monitor & cache any recursive reuse of
1733 * a monitored CR3 page. Because all windows versions are doing this we'll
1734 * have to be able to do combined access monitoring, CR3 + PT and
1735 * PD + PT (guest PAE).
1736 *
1737 * Update:
1738 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1739 */
1740#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1741# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1742 const bool fCanBeMonitored = true;
1743# else
1744 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1745 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1746 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1747# endif
1748# ifdef PGMPOOL_WITH_CACHE
1749 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1750# endif
1751 if (fCanBeMonitored)
1752 {
1753# ifdef PGMPOOL_WITH_MONITORING
1754 rc = pgmPoolMonitorInsert(pPool, pPage);
1755 if (rc == VERR_PGM_POOL_CLEARED)
1756 {
1757 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1758# ifndef PGMPOOL_WITH_CACHE
1759 pgmPoolMonitorFlush(pPool, pPage);
1760 rc = VERR_PGM_POOL_FLUSHED;
1761# endif
1762 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1763 pUser[i].iNext = pPool->iUserFreeHead;
1764 pUser[i].iUser = NIL_PGMPOOL_IDX;
1765 pPool->iUserFreeHead = i;
1766 }
1767 }
1768# endif
1769#endif /* PGMPOOL_WITH_MONITORING */
1770 return rc;
1771}
1772
1773
1774# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1775/**
1776 * Adds a user reference to a page.
1777 *
1778 * This will
1779 * This will move the page to the head of the
1780 *
1781 * @returns VBox status code.
1782 * @retval VINF_SUCCESS if successfully added.
1783 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1784 * @param pPool The pool.
1785 * @param pPage The cached page.
1786 * @param iUser The user index.
1787 * @param iUserTable The user table.
1788 */
1789static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1790{
1791 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1792
1793# ifdef VBOX_STRICT
1794 /*
1795 * Check that the entry doesn't already exists.
1796 */
1797 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1798 {
1799 uint16_t i = pPage->iUserHead;
1800 do
1801 {
1802 Assert(i < pPool->cMaxUsers);
1803 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%d %d\n", iUser, iUserTable));
1804 i = paUsers[i].iNext;
1805 } while (i != NIL_PGMPOOL_USER_INDEX);
1806 }
1807# endif
1808
1809 /*
1810 * Allocate a user node.
1811 */
1812 uint16_t i = pPool->iUserFreeHead;
1813 if (i == NIL_PGMPOOL_USER_INDEX)
1814 {
1815 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1816 if (VBOX_FAILURE(rc))
1817 return rc;
1818 i = pPool->iUserFreeHead;
1819 }
1820 pPool->iUserFreeHead = paUsers[i].iNext;
1821
1822 /*
1823 * Initialize the user node and insert it.
1824 */
1825 paUsers[i].iNext = pPage->iUserHead;
1826 paUsers[i].iUser = iUser;
1827 paUsers[i].iUserTable = iUserTable;
1828 pPage->iUserHead = i;
1829
1830# ifdef PGMPOOL_WITH_CACHE
1831 /*
1832 * Tell the cache to update its replacement stats for this page.
1833 */
1834 pgmPoolCacheUsed(pPool, pPage);
1835# endif
1836 return VINF_SUCCESS;
1837}
1838# endif /* PGMPOOL_WITH_CACHE */
1839
1840
1841/**
1842 * Frees a user record associated with a page.
1843 *
1844 * This does not clear the entry in the user table, it simply replaces the
1845 * user record to the chain of free records.
1846 *
1847 * @param pPool The pool.
1848 * @param HCPhys The HC physical address of the shadow page.
1849 * @param iUser The shadow page pool index of the user table.
1850 * @param iUserTable The index into the user table (shadowed).
1851 */
1852static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1853{
1854 /*
1855 * Unlink and free the specified user entry.
1856 */
1857 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1858
1859 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1860 uint16_t i = pPage->iUserHead;
1861 if ( i != NIL_PGMPOOL_USER_INDEX
1862 && paUsers[i].iUser == iUser
1863 && paUsers[i].iUserTable == iUserTable)
1864 {
1865 pPage->iUserHead = paUsers[i].iNext;
1866
1867 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1868 paUsers[i].iNext = pPool->iUserFreeHead;
1869 pPool->iUserFreeHead = i;
1870 return;
1871 }
1872
1873 /* General: Linear search. */
1874 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1875 while (i != NIL_PGMPOOL_USER_INDEX)
1876 {
1877 if ( paUsers[i].iUser == iUser
1878 && paUsers[i].iUserTable == iUserTable)
1879 {
1880 if (iPrev != NIL_PGMPOOL_USER_INDEX)
1881 paUsers[iPrev].iNext = paUsers[i].iNext;
1882 else
1883 pPage->iUserHead = paUsers[i].iNext;
1884
1885 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1886 paUsers[i].iNext = pPool->iUserFreeHead;
1887 pPool->iUserFreeHead = i;
1888 return;
1889 }
1890 iPrev = i;
1891 i = paUsers[i].iNext;
1892 }
1893
1894 /* Fatal: didn't find it */
1895 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
1896 iUser, iUserTable, pPage->GCPhys));
1897}
1898
1899
1900/**
1901 * Gets the entry size of a shadow table.
1902 *
1903 * @param enmKind
1904 * The kind of page.
1905 *
1906 * @returns The size of the entry in bytes. That is, 4 or 8.
1907 * @returns If the kind is not for a table, an assertion is raised and 0 is
1908 * returned.
1909 */
1910DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
1911{
1912 switch (enmKind)
1913 {
1914 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1915 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1916 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1917 case PGMPOOLKIND_ROOT_32BIT_PD:
1918 return 4;
1919
1920 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1921 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1922 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1923 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1924 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1925 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1926 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1927 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1928 case PGMPOOLKIND_ROOT_PAE_PD:
1929 case PGMPOOLKIND_ROOT_PDPTR:
1930 case PGMPOOLKIND_ROOT_PML4:
1931 return 8;
1932
1933 default:
1934 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1935 }
1936}
1937
1938
1939/**
1940 * Gets the entry size of a guest table.
1941 *
1942 * @param enmKind
1943 * The kind of page.
1944 *
1945 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
1946 * @returns If the kind is not for a table, an assertion is raised and 0 is
1947 * returned.
1948 */
1949DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
1950{
1951 switch (enmKind)
1952 {
1953 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1954 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1955 case PGMPOOLKIND_ROOT_32BIT_PD:
1956 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1957 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1958 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1959 return 4;
1960
1961 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1962 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1963 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1964 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1965 case PGMPOOLKIND_ROOT_PAE_PD:
1966 case PGMPOOLKIND_ROOT_PDPTR:
1967 case PGMPOOLKIND_ROOT_PML4:
1968 return 8;
1969
1970 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1971 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1972 /** @todo can we return 0? (nobody is calling this...) */
1973 return 0;
1974
1975 default:
1976 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1977 }
1978}
1979
1980
1981#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1982/**
1983 * Scans one shadow page table for mappings of a physical page.
1984 *
1985 * @param pVM The VM handle.
1986 * @param pHCPhys The aHCPhys ramrange entry in question.
1987 * @param iShw The shadow page table.
1988 * @param cRefs The number of references made in that PT.
1989 */
1990static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCRTHCPHYS pHCPhys, uint16_t iShw, uint16_t cRefs)
1991{
1992 LogFlow(("pgmPoolTrackFlushGCPhysPT: pHCPhys=%p:{%RHp} iShw=%d cRefs=%d\n", pHCPhys, *pHCPhys, iShw, cRefs));
1993 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1994
1995 /*
1996 * Assert sanity.
1997 */
1998 Assert(cRefs == 1);
1999 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2000 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2001
2002 /*
2003 * Then, clear the actual mappings to the page in the shadow PT.
2004 */
2005 switch (pPage->enmKind)
2006 {
2007 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2008 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2009 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2010 {
2011 const uint32_t u32 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2012 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2013 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2014 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2015 {
2016 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2017 pPT->a[i].u = 0;
2018 cRefs--;
2019 if (!cRefs)
2020 return;
2021 }
2022#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2023 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2024 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2025 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2026 {
2027 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2028 pPT->a[i].u = 0;
2029 }
2030#endif
2031 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2032 break;
2033 }
2034
2035 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2036 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2037 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2038 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2039 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2040 {
2041 const uint64_t u64 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2042 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2043 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2044 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2045 {
2046 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2047 pPT->a[i].u = 0;
2048 cRefs--;
2049 if (!cRefs)
2050 return;
2051 }
2052#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2053 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2054 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2055 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2056 {
2057 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2058 pPT->a[i].u = 0;
2059 }
2060#endif
2061 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2062 break;
2063 }
2064
2065 default:
2066 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2067 }
2068}
2069
2070
2071/**
2072 * Scans one shadow page table for mappings of a physical page.
2073 *
2074 * @param pVM The VM handle.
2075 * @param pHCPhys The aHCPhys ramrange entry in question.
2076 * @param iShw The shadow page table.
2077 * @param cRefs The number of references made in that PT.
2078 */
2079void pgmPoolTrackFlushGCPhysPT(PVM pVM, PRTHCPHYS pHCPhys, uint16_t iShw, uint16_t cRefs)
2080{
2081 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2082 LogFlow(("pgmPoolTrackFlushGCPhysPT: pHCPhys=%p:{%RHp} iShw=%d cRefs=%d\n", pHCPhys, *pHCPhys, iShw, cRefs));
2083 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2084 pgmPoolTrackFlushGCPhysPTInt(pVM, pHCPhys, iShw, cRefs);
2085 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2086 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2087}
2088
2089
2090/**
2091 * Flushes a list of shadow page tables mapping the same physical page.
2092 *
2093 * @param pVM The VM handle.
2094 * @param pHCPhys The aHCPhys ramrange entry in question.
2095 * @param iPhysExt The physical cross reference extent list to flush.
2096 */
2097void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PRTHCPHYS pHCPhys, uint16_t iPhysExt)
2098{
2099 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2100 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2101 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pHCPhys=%p:{%RHp} iPhysExt\n", pHCPhys, *pHCPhys, iPhysExt));
2102
2103 const uint16_t iPhysExtStart = iPhysExt;
2104 PPGMPOOLPHYSEXT pPhysExt;
2105 do
2106 {
2107 Assert(iPhysExt < pPool->cMaxPhysExts);
2108 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2109 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2110 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2111 {
2112 pgmPoolTrackFlushGCPhysPTInt(pVM, pHCPhys, pPhysExt->aidx[i], 1);
2113 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2114 }
2115
2116 /* next */
2117 iPhysExt = pPhysExt->iNext;
2118 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2119
2120 /* insert the list into the free list and clear the ram range entry. */
2121 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2122 pPool->iPhysExtFreeHead = iPhysExtStart;
2123 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2124
2125 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2126}
2127#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2128
2129
2130/**
2131 * Scans all shadow page tables for mappings of a physical page.
2132 *
2133 * This may be slow, but it's most likely more efficient than cleaning
2134 * out the entire page pool / cache.
2135 *
2136 * @returns VBox status code.
2137 * @retval VINF_SUCCESS if all references has been successfully cleared.
2138 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2139 * a page pool cleaning.
2140 *
2141 * @param pVM The VM handle.
2142 * @param pHCPhys The aHCPhys ramrange entry in question.
2143 */
2144int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PRTHCPHYS pHCPhys)
2145{
2146 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2147 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2148 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d *pHCPhys=%RHp\n",
2149 pPool->cUsedPages, pPool->cPresent, *pHCPhys));
2150
2151#if 1
2152 /*
2153 * There is a limit to what makes sense.
2154 */
2155 if (pPool->cPresent > 1024)
2156 {
2157 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2158 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2159 return VINF_PGM_GCPHYS_ALIASED;
2160 }
2161#endif
2162
2163 /*
2164 * Iterate all the pages until we've encountered all that in use.
2165 * This is simple but not quite optimal solution.
2166 */
2167 const uint64_t u64 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2168 const uint32_t u32 = u64;
2169 unsigned cLeft = pPool->cUsedPages;
2170 unsigned iPage = pPool->cCurPages;
2171 while (--iPage >= PGMPOOL_IDX_FIRST)
2172 {
2173 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2174 if (pPage->GCPhys != NIL_RTGCPHYS)
2175 {
2176 switch (pPage->enmKind)
2177 {
2178 /*
2179 * We only care about shadow page tables.
2180 */
2181 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2182 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2183 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2184 {
2185 unsigned cPresent = pPage->cPresent;
2186 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2187 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2188 if (pPT->a[i].n.u1Present)
2189 {
2190 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2191 {
2192 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2193 pPT->a[i].u = 0;
2194 }
2195 if (!--cPresent)
2196 break;
2197 }
2198 break;
2199 }
2200
2201 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2202 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2203 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2204 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2205 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2206 {
2207 unsigned cPresent = pPage->cPresent;
2208 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2209 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2210 if (pPT->a[i].n.u1Present)
2211 {
2212 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2213 {
2214 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2215 pPT->a[i].u = 0;
2216 }
2217 if (!--cPresent)
2218 break;
2219 }
2220 break;
2221 }
2222 }
2223 if (!--cLeft)
2224 break;
2225 }
2226 }
2227
2228 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2229 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2230 return VINF_SUCCESS;
2231}
2232
2233
2234/**
2235 * Clears the user entry in a user table.
2236 *
2237 * This is used to remove all references to a page when flushing it.
2238 */
2239static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2240{
2241 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2242 Assert(pUser->iUser < pPool->cCurPages);
2243
2244 /*
2245 * Map the user page.
2246 */
2247 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2248 union
2249 {
2250 uint64_t *pau64;
2251 uint32_t *pau32;
2252 } u;
2253 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2254
2255#ifdef VBOX_STRICT
2256 /*
2257 * Some sanity checks.
2258 */
2259 switch (pUserPage->enmKind)
2260 {
2261 case PGMPOOLKIND_ROOT_32BIT_PD:
2262 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2263 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2264 break;
2265 case PGMPOOLKIND_ROOT_PAE_PD:
2266 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2267 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2268 break;
2269 case PGMPOOLKIND_ROOT_PDPTR:
2270 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2271 Assert(pUser->iUserTable < 4);
2272 break;
2273 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2274 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2275 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2276 break;
2277 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2278 case PGMPOOLKIND_ROOT_PML4:
2279 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2280 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2281 break;
2282 default:
2283 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2284 break;
2285 }
2286#endif /* VBOX_STRICT */
2287
2288 /*
2289 * Clear the entry in the user page.
2290 */
2291 switch (pUserPage->enmKind)
2292 {
2293 /* 32-bit entries */
2294 case PGMPOOLKIND_ROOT_32BIT_PD:
2295 u.pau32[pUser->iUserTable] = 0;
2296 break;
2297
2298 /* 64-bit entries */
2299 case PGMPOOLKIND_ROOT_PAE_PD:
2300 case PGMPOOLKIND_ROOT_PDPTR:
2301 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2302 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2303 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2304 case PGMPOOLKIND_ROOT_PML4:
2305 u.pau64[pUser->iUserTable] = 0;
2306 break;
2307
2308 default:
2309 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2310 }
2311}
2312
2313
2314/**
2315 * Clears all users of a page.
2316 */
2317static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2318{
2319 /*
2320 * Free all the user records.
2321 */
2322 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2323 uint16_t i = pPage->iUserHead;
2324 while (i != NIL_PGMPOOL_USER_INDEX)
2325 {
2326 /* Clear enter in user table. */
2327 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2328
2329 /* Free it. */
2330 const uint16_t iNext = paUsers[i].iNext;
2331 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2332 paUsers[i].iNext = pPool->iUserFreeHead;
2333 pPool->iUserFreeHead = i;
2334
2335 /* Next. */
2336 i = iNext;
2337 }
2338 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2339}
2340
2341
2342#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2343/**
2344 * Allocates a new physical cross reference extent.
2345 *
2346 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2347 * @param pVM The VM handle.
2348 * @param piPhysExt Where to store the phys ext index.
2349 */
2350PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2351{
2352 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2353 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2354 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2355 {
2356 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2357 return NULL;
2358 }
2359 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2360 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2361 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2362 *piPhysExt = iPhysExt;
2363 return pPhysExt;
2364}
2365
2366
2367/**
2368 * Frees a physical cross reference extent.
2369 *
2370 * @param pVM The VM handle.
2371 * @param iPhysExt The extent to free.
2372 */
2373void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2374{
2375 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2376 Assert(iPhysExt < pPool->cMaxPhysExts);
2377 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2378 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2379 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2380 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2381 pPool->iPhysExtFreeHead = iPhysExt;
2382}
2383
2384
2385/**
2386 * Frees a physical cross reference extent.
2387 *
2388 * @param pVM The VM handle.
2389 * @param iPhysExt The extent to free.
2390 */
2391void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2392{
2393 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2394
2395 const uint16_t iPhysExtStart = iPhysExt;
2396 PPGMPOOLPHYSEXT pPhysExt;
2397 do
2398 {
2399 Assert(iPhysExt < pPool->cMaxPhysExts);
2400 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2401 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2402 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2403
2404 /* next */
2405 iPhysExt = pPhysExt->iNext;
2406 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2407
2408 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2409 pPool->iPhysExtFreeHead = iPhysExtStart;
2410}
2411
2412/**
2413 * Insert a reference into a list of physical cross reference extents.
2414 *
2415 * @returns The new ram range flags (top 16-bits).
2416 *
2417 * @param pVM The VM handle.
2418 * @param iPhysExt The physical extent index of the list head.
2419 * @param iShwPT The shadow page table index.
2420 *
2421 */
2422static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2423{
2424 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2425 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2426
2427 /* special common case. */
2428 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2429 {
2430 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2431 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2432 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2433 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2434 }
2435
2436 /* general treatment. */
2437 const uint16_t iPhysExtStart = iPhysExt;
2438 unsigned cMax = 15;
2439 for (;;)
2440 {
2441 Assert(iPhysExt < pPool->cMaxPhysExts);
2442 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2443 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2444 {
2445 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2446 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2447 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2448 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2449 }
2450 if (!--cMax)
2451 {
2452 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2453 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2454 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2455 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2456 }
2457 }
2458
2459 /* add another extent to the list. */
2460 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2461 if (!pNew)
2462 {
2463 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2464 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2465 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2466 }
2467 pNew->iNext = iPhysExtStart;
2468 pNew->aidx[0] = iShwPT;
2469 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2470 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2471}
2472
2473
2474/**
2475 * Add a reference to guest physical page where extents are in use.
2476 *
2477 * @returns The new ram range flags (top 16-bits).
2478 *
2479 * @param pVM The VM handle.
2480 * @param u16 The ram range flags (top 16-bits).
2481 * @param iShwPT The shadow page table index.
2482 */
2483uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2484{
2485 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2486 {
2487 /*
2488 * Convert to extent list.
2489 */
2490 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2491 uint16_t iPhysExt;
2492 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2493 if (pPhysExt)
2494 {
2495 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2496 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2497 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2498 pPhysExt->aidx[1] = iShwPT;
2499 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2500 }
2501 else
2502 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2503 }
2504 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2505 {
2506 /*
2507 * Insert into the extent list.
2508 */
2509 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2510 }
2511 else
2512 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2513 return u16;
2514}
2515
2516
2517/**
2518 * Clear references to guest physical memory.
2519 *
2520 * @param pPool The pool.
2521 * @param pPage The page.
2522 * @param pHCPhys Pointer to the aHCPhys entry in the ram range.
2523 */
2524void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PRTHCPHYS pHCPhys)
2525{
2526 const unsigned cRefs = *pHCPhys >> MM_RAM_FLAGS_CREFS_SHIFT;
2527 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d *pHCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, *pHCPhys, pPage, pPage->idx));
2528
2529 uint16_t iPhysExt = (*pHCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2530 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2531 {
2532 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2533 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2534 do
2535 {
2536 Assert(iPhysExt < pPool->cMaxPhysExts);
2537
2538 /*
2539 * Look for the shadow page and check if it's all freed.
2540 */
2541 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2542 {
2543 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2544 {
2545 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2546
2547 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2548 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2549 {
2550 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d\n", *pHCPhys, pPage->idx));
2551 return;
2552 }
2553
2554 /* we can free the node. */
2555 PVM pVM = pPool->CTXSUFF(pVM);
2556 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2557 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2558 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2559 {
2560 /* lonely node */
2561 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2562 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d lonely\n", *pHCPhys, pPage->idx));
2563 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2564 }
2565 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2566 {
2567 /* head */
2568 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d head\n", *pHCPhys, pPage->idx));
2569 *pHCPhys = (*pHCPhys & MM_RAM_FLAGS_NO_REFS_MASK)
2570 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2571 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2572 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2573 }
2574 else
2575 {
2576 /* in list */
2577 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d\n", *pHCPhys, pPage->idx));
2578 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2579 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2580 }
2581 iPhysExt = iPhysExtNext;
2582 return;
2583 }
2584 }
2585
2586 /* next */
2587 iPhysExtPrev = iPhysExt;
2588 iPhysExt = paPhysExts[iPhysExt].iNext;
2589 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2590
2591 AssertFatalMsgFailed(("not-found! cRefs=%d *pHCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, *pHCPhys, pPage, pPage->idx));
2592 }
2593 else /* nothing to do */
2594 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64\n", *pHCPhys));
2595}
2596
2597
2598
2599/**
2600 * Clear references to guest physical memory.
2601 *
2602 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2603 * is assumed to be correct, so the linear search can be skipped and we can assert
2604 * at an earlier point.
2605 *
2606 * @param pPool The pool.
2607 * @param pPage The page.
2608 * @param HCPhys The host physical address corresponding to the guest page.
2609 * @param GCPhys The guest physical address corresponding to HCPhys.
2610 */
2611static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2612{
2613 /*
2614 * Walk range list.
2615 */
2616 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2617 while (pRam)
2618 {
2619 RTGCPHYS off = GCPhys - pRam->GCPhys;
2620 if (off < pRam->cb)
2621 {
2622 /* does it match? */
2623 const unsigned iPage = off >> PAGE_SHIFT;
2624 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2625 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2626 {
2627 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2628 return;
2629 }
2630 break;
2631 }
2632 pRam = CTXSUFF(pRam->pNext);
2633 }
2634 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2635}
2636
2637
2638/**
2639 * Clear references to guest physical memory.
2640 *
2641 * @param pPool The pool.
2642 * @param pPage The page.
2643 * @param HCPhys The host physical address corresponding to the guest page.
2644 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2645 */
2646static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2647{
2648 /*
2649 * Walk range list.
2650 */
2651 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2652 while (pRam)
2653 {
2654 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2655 if (off < pRam->cb)
2656 {
2657 /* does it match? */
2658 const unsigned iPage = off >> PAGE_SHIFT;
2659 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2660 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2661 {
2662 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2663 return;
2664 }
2665 break;
2666 }
2667 pRam = CTXSUFF(pRam->pNext);
2668 }
2669
2670 /*
2671 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2672 */
2673 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2674 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2675 while (pRam)
2676 {
2677 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2678 while (iPage-- > 0)
2679 {
2680 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2681 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2682 {
2683 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2684 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2685 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2686 return;
2687 }
2688 }
2689 pRam = CTXSUFF(pRam->pNext);
2690 }
2691
2692 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2693}
2694
2695
2696/**
2697 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2698 *
2699 * @param pPool The pool.
2700 * @param pPage The page.
2701 * @param pShwPT The shadow page table (mapping of the page).
2702 * @param pGstPT The guest page table.
2703 */
2704DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2705{
2706 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2707 if (pShwPT->a[i].n.u1Present)
2708 {
2709 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2710 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2711 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2712 if (!--pPage->cPresent)
2713 break;
2714 }
2715}
2716
2717
2718/**
2719 * Clear references to guest physical memory in a PAE / 32-bit page table.
2720 *
2721 * @param pPool The pool.
2722 * @param pPage The page.
2723 * @param pShwPT The shadow page table (mapping of the page).
2724 * @param pGstPT The guest page table (just a half one).
2725 */
2726DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2727{
2728 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2729 if (pShwPT->a[i].n.u1Present)
2730 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2731}
2732
2733
2734/**
2735 * Clear references to guest physical memory in a PAE / PAE page table.
2736 *
2737 * @param pPool The pool.
2738 * @param pPage The page.
2739 * @param pShwPT The shadow page table (mapping of the page).
2740 * @param pGstPT The guest page table.
2741 */
2742DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2743{
2744 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2745 if (pShwPT->a[i].n.u1Present)
2746 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2747}
2748
2749
2750/**
2751 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2752 *
2753 * @param pPool The pool.
2754 * @param pPage The page.
2755 * @param pShwPT The shadow page table (mapping of the page).
2756 */
2757DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2758{
2759 RTGCPHYS GCPhys = pPage->GCPhys;
2760 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2761 if (pShwPT->a[i].n.u1Present)
2762 {
2763 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2764 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2765 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2766 }
2767}
2768
2769
2770/**
2771 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2772 *
2773 * @param pPool The pool.
2774 * @param pPage The page.
2775 * @param pShwPT The shadow page table (mapping of the page).
2776 */
2777DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2778{
2779 RTGCPHYS GCPhys = pPage->GCPhys;
2780 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2781 if (pShwPT->a[i].n.u1Present)
2782 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2783}
2784#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2785
2786/**
2787 * Clear references to shadowed pages in a PAE page directory.
2788 *
2789 * @param pPool The pool.
2790 * @param pPage The page.
2791 * @param pShwPD The shadow page directory (mapping of the page).
2792 */
2793DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2794{
2795 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2796 {
2797 if (pShwPD->a[i].n.u1Present)
2798 {
2799 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2800 if (pSubPage)
2801 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2802 else
2803 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2804 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2805 }
2806 }
2807}
2808
2809
2810/**
2811 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2812 *
2813 * @param pPool The pool.
2814 * @param pPage The page.
2815 * @param pShwPdPtr The shadow page directory pointer table (mapping of the page).
2816 */
2817DECLINLINE(void) pgmPoolTrackDerefPDPTR64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPTR pShwPdPtr)
2818{
2819 for (unsigned i = 0; i < ELEMENTS(pShwPdPtr->a); i++)
2820 {
2821 if (pShwPdPtr->a[i].n.u1Present)
2822 {
2823 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPdPtr->a[i].u & X86_PDPE_PG_MASK);
2824 if (pSubPage)
2825 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2826 else
2827 AssertFatalMsgFailed(("%RX64\n", pShwPdPtr->a[i].u & X86_PDPE_PG_MASK));
2828 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2829 }
2830 }
2831}
2832
2833
2834/**
2835 * Clears all references made by this page.
2836 *
2837 * This includes other shadow pages and GC physical addresses.
2838 *
2839 * @param pPool The pool.
2840 * @param pPage The page.
2841 */
2842static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2843{
2844 /*
2845 * Map the shadow page and take action according to the page kind.
2846 */
2847 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2848 switch (pPage->enmKind)
2849 {
2850#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2851 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2852 {
2853 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2854 void *pvGst;
2855 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2856 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2857 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2858 break;
2859 }
2860
2861 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2862 {
2863 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2864 void *pvGst;
2865 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2866 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2867 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2868 break;
2869 }
2870
2871 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2872 {
2873 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2874 void *pvGst;
2875 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2876 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
2877 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2878 break;
2879 }
2880
2881 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
2882 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2883 {
2884 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2885 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
2886 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2887 break;
2888 }
2889
2890 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
2891 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2892 {
2893 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2894 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
2895 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2896 break;
2897 }
2898
2899#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2900 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2901 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2902 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2903 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2904 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2905 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2906 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2907 break;
2908#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2909
2910 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2911 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2912 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
2913 break;
2914
2915 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2916 pgmPoolTrackDerefPDPTR64Bit(pPool, pPage, (PX86PDPTR)pvShw);
2917 break;
2918
2919 default:
2920 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
2921 }
2922
2923 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
2924 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2925 ASMMemZeroPage(pvShw);
2926 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2927 pPage->fZeroed = true;
2928}
2929#endif /* PGMPOOL_WITH_USER_TRACKING */
2930
2931
2932/**
2933 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
2934 *
2935 * @param pPool The pool.
2936 */
2937static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
2938{
2939 /*
2940 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
2941 */
2942 Assert(NIL_PGMPOOL_IDX == 0);
2943 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
2944 {
2945 /*
2946 * Get the page address.
2947 */
2948 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2949 union
2950 {
2951 uint64_t *pau64;
2952 uint32_t *pau32;
2953 } u;
2954 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2955
2956 /*
2957 * Mark stuff not present.
2958 */
2959 switch (pPage->enmKind)
2960 {
2961 case PGMPOOLKIND_ROOT_32BIT_PD:
2962 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
2963 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2964 u.pau32[iPage] = 0;
2965 break;
2966
2967 case PGMPOOLKIND_ROOT_PAE_PD:
2968 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * 4; iPage++)
2969 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2970 u.pau64[iPage] = 0;
2971 break;
2972
2973 case PGMPOOLKIND_ROOT_PML4:
2974 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
2975 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
2976 u.pau64[iPage] = 0;
2977 break;
2978
2979 case PGMPOOLKIND_ROOT_PDPTR:
2980 /* Not root of shadowed pages currently, ignore it. */
2981 break;
2982 }
2983 }
2984
2985 /*
2986 * Paranoia (to be removed), flag a global CR3 sync.
2987 */
2988 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
2989}
2990
2991
2992/**
2993 * Flushes the entire cache.
2994 *
2995 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
2996 * and execute this CR3 flush.
2997 *
2998 * @param pPool The pool.
2999 */
3000static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3001{
3002 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3003 LogFlow(("pgmPoolFlushAllInt:\n"));
3004
3005 /*
3006 * If there are no pages in the pool, there is nothing to do.
3007 */
3008 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3009 {
3010 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3011 return;
3012 }
3013
3014 /*
3015 * Nuke the free list and reinsert all pages into it.
3016 */
3017 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3018 {
3019 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3020
3021#ifdef IN_RING3
3022 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3023#endif
3024#ifdef PGMPOOL_WITH_MONITORING
3025 if (pPage->fMonitored)
3026 pgmPoolMonitorFlush(pPool, pPage);
3027 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3028 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3029 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3030 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3031 pPage->cModifications = 0;
3032#endif
3033 pPage->GCPhys = NIL_RTGCPHYS;
3034 pPage->enmKind = PGMPOOLKIND_FREE;
3035 Assert(pPage->idx == i);
3036 pPage->iNext = i + 1;
3037 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3038 pPage->fSeenNonGlobal = false;
3039 pPage->fMonitored= false;
3040 pPage->fCached = false;
3041 pPage->fReusedFlushPending = false;
3042 pPage->fCR3Mix = false;
3043#ifdef PGMPOOL_WITH_USER_TRACKING
3044 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3045#endif
3046#ifdef PGMPOOL_WITH_CACHE
3047 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3048 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3049#endif
3050 }
3051 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3052 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3053 pPool->cUsedPages = 0;
3054
3055#ifdef PGMPOOL_WITH_USER_TRACKING
3056 /*
3057 * Zap and reinitialize the user records.
3058 */
3059 pPool->cPresent = 0;
3060 pPool->iUserFreeHead = 0;
3061 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3062 const unsigned cMaxUsers = pPool->cMaxUsers;
3063 for (unsigned i = 0; i < cMaxUsers; i++)
3064 {
3065 paUsers[i].iNext = i + 1;
3066 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3067 paUsers[i].iUserTable = 0xfffe;
3068 }
3069 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3070#endif
3071
3072#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3073 /*
3074 * Clear all the GCPhys links and rebuild the phys ext free list.
3075 */
3076 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
3077 pRam;
3078 pRam = pRam->CTXSUFF(pNext))
3079 {
3080 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3081 while (iPage-- > 0)
3082 pRam->aHCPhys[iPage] &= MM_RAM_FLAGS_NO_REFS_MASK;
3083 }
3084
3085 pPool->iPhysExtFreeHead = 0;
3086 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3087 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3088 for (unsigned i = 0; i < cMaxPhysExts; i++)
3089 {
3090 paPhysExts[i].iNext = i + 1;
3091 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3092 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3093 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3094 }
3095 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3096#endif
3097
3098#ifdef PGMPOOL_WITH_MONITORING
3099 /*
3100 * Just zap the modified list.
3101 */
3102 pPool->cModifiedPages = 0;
3103 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3104#endif
3105
3106#ifdef PGMPOOL_WITH_CACHE
3107 /*
3108 * Clear the GCPhys hash and the age list.
3109 */
3110 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3111 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3112 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3113 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3114#endif
3115
3116 /*
3117 * Flush all the special root pages.
3118 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3119 */
3120 pgmPoolFlushAllSpecialRoots(pPool);
3121 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3122 {
3123 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3124 pPage->iNext = NIL_PGMPOOL_IDX;
3125#ifdef PGMPOOL_WITH_MONITORING
3126 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3127 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3128 pPage->cModifications = 0;
3129 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3130 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3131 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3132 if (pPage->fMonitored)
3133 {
3134 PVM pVM = pPool->CTXSUFF(pVM);
3135 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3136 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pPage),
3137 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pPage),
3138 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pPage),
3139 pPool->pszAccessHandler);
3140 AssertFatalRCSuccess(rc);
3141# ifdef PGMPOOL_WITH_CACHE
3142 pgmPoolHashInsert(pPool, pPage);
3143# endif
3144 }
3145#endif
3146#ifdef PGMPOOL_WITH_USER_TRACKING
3147 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3148#endif
3149#ifdef PGMPOOL_WITH_CACHE
3150 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3151 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3152#endif
3153 }
3154
3155 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3156}
3157
3158
3159/**
3160 * Flushes a pool page.
3161 *
3162 * This moves the page to the free list after removing all user references to it.
3163 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3164 *
3165 * @returns VBox status code.
3166 * @retval VINF_SUCCESS on success.
3167 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3168 * @param pPool The pool.
3169 * @param HCPhys The HC physical address of the shadow page.
3170 */
3171int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3172{
3173 int rc = VINF_SUCCESS;
3174 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3175 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3176 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3177
3178 /*
3179 * Quietly reject any attempts at flushing any of the special root pages.
3180 */
3181 if (pPage->idx < PGMPOOL_IDX_FIRST)
3182 {
3183 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3184 return VINF_SUCCESS;
3185 }
3186
3187 /*
3188 * Mark the page as being in need of a ASMMemZeroPage().
3189 */
3190 pPage->fZeroed = false;
3191
3192#ifdef PGMPOOL_WITH_USER_TRACKING
3193 /*
3194 * Clear the page.
3195 */
3196 pgmPoolTrackClearPageUsers(pPool, pPage);
3197 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3198 pgmPoolTrackDeref(pPool, pPage);
3199 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3200#endif
3201
3202#ifdef PGMPOOL_WITH_CACHE
3203 /*
3204 * Flush it from the cache.
3205 */
3206 pgmPoolCacheFlushPage(pPool, pPage);
3207#endif /* PGMPOOL_WITH_CACHE */
3208
3209#ifdef PGMPOOL_WITH_MONITORING
3210 /*
3211 * Deregistering the monitoring.
3212 */
3213 if (pPage->fMonitored)
3214 rc = pgmPoolMonitorFlush(pPool, pPage);
3215#endif
3216
3217 /*
3218 * Free the page.
3219 */
3220 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3221 pPage->iNext = pPool->iFreeHead;
3222 pPool->iFreeHead = pPage->idx;
3223 pPage->enmKind = PGMPOOLKIND_FREE;
3224 pPage->GCPhys = NIL_RTGCPHYS;
3225 pPage->fReusedFlushPending = false;
3226
3227 pPool->cUsedPages--;
3228 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3229 return rc;
3230}
3231
3232
3233/**
3234 * Frees a usage of a pool page.
3235 *
3236 * The caller is responsible to updating the user table so that it no longer
3237 * references the shadow page.
3238 *
3239 * @param pPool The pool.
3240 * @param HCPhys The HC physical address of the shadow page.
3241 * @param iUser The shadow page pool index of the user table.
3242 * @param iUserTable The index into the user table (shadowed).
3243 */
3244void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3245{
3246 STAM_PROFILE_START(&pPool->StatFree, a);
3247 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3248 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3249 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3250#ifdef PGMPOOL_WITH_USER_TRACKING
3251 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3252#endif
3253#ifdef PGMPOOL_WITH_CACHE
3254 if (!pPage->fCached)
3255#endif
3256 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3257 STAM_PROFILE_STOP(&pPool->StatFree, a);
3258}
3259
3260
3261/**
3262 * Makes one or more free page free.
3263 *
3264 * @returns VBox status code.
3265 * @retval VINF_SUCCESS on success.
3266 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3267 *
3268 * @param pPool The pool.
3269 * @param iUser The user of the page.
3270 */
3271static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3272{
3273 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3274
3275 /*
3276 * If the pool isn't full grown yet, expand it.
3277 */
3278 if (pPool->cCurPages < pPool->cMaxPages)
3279 {
3280 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3281#ifdef IN_RING3
3282 int rc = PGMR3PoolGrow(pPool->pVMHC);
3283#else
3284 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3285#endif
3286 if (VBOX_FAILURE(rc))
3287 return rc;
3288 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3289 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3290 return VINF_SUCCESS;
3291 }
3292
3293#ifdef PGMPOOL_WITH_CACHE
3294 /*
3295 * Free one cached page.
3296 */
3297 return pgmPoolCacheFreeOne(pPool, iUser);
3298#else
3299 /*
3300 * Flush the pool.
3301 * If we have tracking enabled, it should be possible to come up with
3302 * a cheap replacement strategy...
3303 */
3304 pgmPoolFlushAllInt(pPool);
3305 return VERR_PGM_POOL_FLUSHED;
3306#endif
3307}
3308
3309
3310/**
3311 * Allocates a page from the pool.
3312 *
3313 * This page may actually be a cached page and not in need of any processing
3314 * on the callers part.
3315 *
3316 * @returns VBox status code.
3317 * @retval VINF_SUCCESS if a NEW page was allocated.
3318 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3319 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3320 * @param pVM The VM handle.
3321 * @param GCPhys The GC physical address of the page we're gonna shadow.
3322 * For 4MB and 2MB PD entries, it's the first address the
3323 * shadow PT is covering.
3324 * @param enmKind The kind of mapping.
3325 * @param iUser The shadow page pool index of the user table.
3326 * @param iUserTable The index into the user table (shadowed).
3327 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3328 */
3329int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3330{
3331 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3332 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3333 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3334
3335 *ppPage = NULL;
3336
3337#ifdef PGMPOOL_WITH_CACHE
3338 if (pPool->fCacheEnabled)
3339 {
3340 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3341 if (VBOX_SUCCESS(rc2))
3342 {
3343 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3344 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3345 return rc2;
3346 }
3347 }
3348#endif
3349
3350 /*
3351 * Allocate a new one.
3352 */
3353 int rc = VINF_SUCCESS;
3354 uint16_t iNew = pPool->iFreeHead;
3355 if (iNew == NIL_PGMPOOL_IDX)
3356 {
3357 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3358 if (VBOX_FAILURE(rc))
3359 {
3360 if (rc != VERR_PGM_POOL_CLEARED)
3361 {
3362 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3363 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3364 return rc;
3365 }
3366 rc = VERR_PGM_POOL_FLUSHED;
3367 }
3368 iNew = pPool->iFreeHead;
3369 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3370 }
3371
3372 /* unlink the free head */
3373 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3374 pPool->iFreeHead = pPage->iNext;
3375 pPage->iNext = NIL_PGMPOOL_IDX;
3376
3377 /*
3378 * Initialize it.
3379 */
3380 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3381 pPage->enmKind = enmKind;
3382 pPage->GCPhys = GCPhys;
3383 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3384 pPage->fMonitored = false;
3385 pPage->fCached = false;
3386 pPage->fReusedFlushPending = false;
3387 pPage->fCR3Mix = false;
3388#ifdef PGMPOOL_WITH_MONITORING
3389 pPage->cModifications = 0;
3390 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3391 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3392#endif
3393#ifdef PGMPOOL_WITH_USER_TRACKING
3394 pPage->cPresent = 0;
3395 pPage->iFirstPresent = ~0;
3396
3397 /*
3398 * Insert into the tracking and cache. If this fails, free the page.
3399 */
3400 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3401 if (VBOX_FAILURE(rc3))
3402 {
3403 if (rc3 != VERR_PGM_POOL_CLEARED)
3404 {
3405 pPool->cUsedPages--;
3406 pPage->enmKind = PGMPOOLKIND_FREE;
3407 pPage->GCPhys = NIL_RTGCPHYS;
3408 pPage->iNext = pPool->iFreeHead;
3409 pPool->iFreeHead = pPage->idx;
3410 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3411 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3412 return rc3;
3413 }
3414 rc = VERR_PGM_POOL_FLUSHED;
3415 }
3416#endif /* PGMPOOL_WITH_USER_TRACKING */
3417
3418 /*
3419 * Commit the allocation, clear the page and return.
3420 */
3421#ifdef VBOX_WITH_STATISTICS
3422 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3423 pPool->cUsedPagesHigh = pPool->cUsedPages;
3424#endif
3425
3426 if (!pPage->fZeroed)
3427 {
3428 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3429 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3430 ASMMemZeroPage(pv);
3431 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3432 }
3433
3434 *ppPage = pPage;
3435 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3436 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3437 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3438 return rc;
3439}
3440
3441
3442/**
3443 * Frees a usage of a pool page.
3444 *
3445 * @param pVM The VM handle.
3446 * @param HCPhys The HC physical address of the shadow page.
3447 * @param iUser The shadow page pool index of the user table.
3448 * @param iUserTable The index into the user table (shadowed).
3449 */
3450void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3451{
3452 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3453 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3454 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3455}
3456
3457
3458/**
3459 * Gets a in-use page in the pool by it's physical address.
3460 *
3461 * @returns Pointer to the page.
3462 * @param pVM The VM handle.
3463 * @param HCPhys The HC physical address of the shadow page.
3464 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3465 */
3466PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3467{
3468 /** @todo profile this! */
3469 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3470 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3471 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3472 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3473 return pPage;
3474}
3475
3476
3477/**
3478 * Flushes the entire cache.
3479 *
3480 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3481 * and execute this CR3 flush.
3482 *
3483 * @param pPool The pool.
3484 */
3485void pgmPoolFlushAll(PVM pVM)
3486{
3487 LogFlow(("pgmPoolFlushAll:\n"));
3488 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3489}
3490
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette