VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 1507

Last change on this file since 1507 was 58, checked in by vboxsync, 18 years ago

Removed incorrect assertion

  • Property svn:keywords set to Id
File size: 117.5 KB
Line 
1/* $Id: PGMAllPool.cpp 58 2007-01-16 12:39:56Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006 InnoTek Systemberatung GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * If you received this file as part of a commercial VirtualBox
18 * distribution, then only the terms of your commercial VirtualBox
19 * license agreement apply instead of the previous paragraph.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37
38#include <VBox/log.h>
39#include <VBox/err.h>
40#include <iprt/asm.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46__BEGIN_DECLS
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48#ifdef PGMPOOL_WITH_USER_TRACKING
49DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
50DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
51static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#endif
53#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
54static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
55#endif
56#ifdef PGMPOOL_WITH_CACHE
57static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
58#endif
59#ifdef PGMPOOL_WITH_MONITORING
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#endif
62#ifndef IN_RING3
63DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
64#endif
65__END_DECLS
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88#ifdef IN_GC
89/**
90 * Maps a pool page into the current context.
91 *
92 * @returns Pointer to the mapping.
93 * @param pVM The VM handle.
94 * @param pPage The page to map.
95 */
96void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
97{
98 /* general pages. */
99 if (pPage->idx >= PGMPOOL_IDX_FIRST)
100 {
101 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
102 void *pv;
103 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
104 AssertReleaseRC(rc);
105 return pv;
106 }
107
108 /* special pages. */
109 switch (pPage->idx)
110 {
111 case PGMPOOL_IDX_PD:
112 return pVM->pgm.s.pGC32BitPD;
113 case PGMPOOL_IDX_PAE_PD:
114 return pVM->pgm.s.apGCPaePDs[0];
115 case PGMPOOL_IDX_PDPTR:
116 return pVM->pgm.s.pGCPaePDPTR;
117 case PGMPOOL_IDX_PML4:
118 return pVM->pgm.s.pGCPaePML4;
119 default:
120 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
121 return NULL;
122 }
123}
124#endif /* IN_GC */
125
126
127#ifdef PGMPOOL_WITH_MONITORING
128/**
129 * Determin the size of a write instruction.
130 * @returns number of bytes written.
131 * @param pDis The disassembler state.
132 */
133static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
134{
135 /*
136 * This is very crude and possibly wrong for some opcodes,
137 * but since it's not really supposed to be called we can
138 * probably live with that.
139 */
140 return DISGetParamSize(pDis, &pDis->param1);
141}
142
143
144/**
145 * Flushes a chain of pages sharing the same access monitor.
146 *
147 * @returns VBox status code suitable for scheduling.
148 * @param pPool The pool.
149 * @param pPage A page in the chain.
150 */
151int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
152{
153 /*
154 * Find the list head.
155 */
156 uint16_t idx = pPage->idx;
157 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
158 {
159 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
160 {
161 idx = pPage->iMonitoredPrev;
162 Assert(idx != pPage->idx);
163 pPage = &pPool->aPages[idx];
164 }
165 }
166
167 /*
168 * Itereate the list flushing each shadow page.
169 */
170 int rc = VINF_SUCCESS;
171 for (;;)
172 {
173 idx = pPage->iMonitoredNext;
174 Assert(idx != pPage->idx);
175 if (pPage->idx >= PGMPOOL_IDX_FIRST)
176 {
177 int rc2 = pgmPoolFlushPage(pPool, pPage);
178 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
179 rc = VINF_PGM_SYNC_CR3;
180 }
181 /* next */
182 if (idx == NIL_PGMPOOL_IDX)
183 break;
184 pPage = &pPool->aPages[idx];
185 }
186 return rc;
187}
188
189
190/**
191 * Wrapper for getting the current context pointer to the entry begin modified.
192 *
193 * @returns Pointer to the current context mapping of the entry.
194 * @param pPool The pool.
195 * @param pvFault The fault virtual address.
196 * @param GCPhysFault The fault physical address.
197 * @param cbEntry The entry size.
198 */
199#ifdef IN_RING3
200DECLINLINE(void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
201#else
202DECLINLINE(void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
203#endif
204{
205#ifdef IN_GC
206 return (RTGCPTR)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
207
208#elif defined(IN_RING0)
209 void *pvRet;
210 int rc = PGMRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
211 AssertFatalRCSuccess(rc);
212 return pvRet;
213
214#elif defined(IN_RING3)
215 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
216#else
217# error "huh?"
218#endif
219}
220
221
222/**
223 * Process shadow entries before they are changed by the guest.
224 *
225 * For PT entries we will clear them. For PD entries, we'll simply check
226 * for mapping conflicts and set the SyncCR3 FF if found.
227 *
228 * @param pPool The pool.
229 * @param pPage The head page.
230 * @param GCPhysFault The guest physical fault address.
231 * @param uAddress In R0 and GC this is the guest context fault address (flat).
232 * In R3 this is the host context 'fault' address.
233 * @param pCpu The disassembler state for figuring out the write size.
234 * This need not be specified if the caller knows we won't do cross entry accesses.
235 */
236#ifdef IN_RING3
237void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
238#else
239void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
240#endif
241{
242 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
243 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
244 for (;;)
245 {
246 union
247 {
248 void *pv;
249 PX86PT pPT;
250 PX86PTPAE pPTPae;
251 PX86PD pPD;
252 PX86PDPAE pPDPae;
253 } uShw;
254 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
255
256 switch (pPage->enmKind)
257 {
258 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
259 {
260 const unsigned iShw = off / sizeof(X86PTE);
261 if (uShw.pPT->a[iShw].n.u1Present)
262 {
263# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
264 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
265 pgmPoolTracDerefGCPhysHint(pPool, pPage,
266 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
267 pGstPte->u & X86_PTE_PG_MASK);
268# endif
269 uShw.pPT->a[iShw].u = 0;
270 }
271 break;
272 }
273
274 /* page/2 sized */
275 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
276 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
277 {
278 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
279 if (uShw.pPTPae->a[iShw].n.u1Present)
280 {
281# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
282 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
283 pgmPoolTracDerefGCPhysHint(pPool, pPage,
284 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
285 pGstPte->u & X86_PTE_PG_MASK);
286# endif
287 uShw.pPTPae->a[iShw].u = 0;
288 }
289 }
290 break;
291
292 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
293 {
294 const unsigned iShw = off / sizeof(X86PTPAE);
295 if (uShw.pPTPae->a[iShw].n.u1Present)
296 {
297# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
298 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
299 pgmPoolTracDerefGCPhysHint(pPool, pPage,
300 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
301 pGstPte->u & X86_PTE_PAE_PG_MASK);
302# endif
303 uShw.pPTPae->a[iShw].u = 0;
304 }
305 break;
306 }
307
308 case PGMPOOLKIND_ROOT_32BIT_PD:
309 {
310 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
311 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
312 {
313 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
314 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
315 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
316 }
317 /* paranoia / a bit assumptive. */
318 else if ( pCpu
319 && (off & 4)
320 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
321 {
322 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
323 if ( iShw2 != iShw
324 && iShw2 < ELEMENTS(uShw.pPD->a)
325 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
326 {
327 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
328 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
329 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
330 }
331 }
332#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
333 if ( uShw.pPD->a[iShw].n.u1Present
334 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
335 {
336 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
337# ifdef IN_GC /* TLB load - we're pushing things a bit... */
338 ASMProbeReadByte(pvAddress);
339# endif
340 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
341 uShw.pPD->a[iShw].u = 0;
342 }
343#endif
344 break;
345 }
346
347 case PGMPOOLKIND_ROOT_PAE_PD:
348 {
349 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
350 for (unsigned i = 0; i < 2; i++, iShw++)
351 {
352 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
353 {
354 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
355 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
356 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
357 }
358 /* paranoia / a bit assumptive. */
359 else if ( pCpu
360 && (off & 4)
361 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
362 {
363 const unsigned iShw2 = iShw + 2;
364 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
365 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
366 {
367 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
368 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
369 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
370 }
371 }
372#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
373 if ( uShw.pPDPae->a[iShw].n.u1Present
374 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
375 {
376 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
377# ifdef IN_GC /* TLB load - we're pushing things a bit... */
378 ASMProbeReadByte(pvAddress);
379# endif
380 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
381 uShw.pPDPae->a[iShw].u = 0;
382 }
383#endif
384 }
385 break;
386 }
387
388 default:
389 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
390 }
391
392 /* next */
393 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
394 return;
395 pPage = &pPool->aPages[pPage->iMonitoredNext];
396 }
397}
398
399
400# ifndef IN_RING3
401/**
402 * Checks if a access could be a fork operation in progress.
403 *
404 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
405 *
406 * @returns true if it's likly that we're forking, otherwise false.
407 * @param pPool The pool.
408 * @param pCpu The disassembled instruction.
409 * @param offFault The access offset.
410 */
411DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
412{
413 /*
414 * i386 linux is using btr to clear X86_PTE_RW.
415 * The functions involved are (2.6.16 source inspection):
416 * clear_bit
417 * ptep_set_wrprotect
418 * copy_one_pte
419 * copy_pte_range
420 * copy_pmd_range
421 * copy_pud_range
422 * copy_page_range
423 * dup_mmap
424 * dup_mm
425 * copy_mm
426 * copy_process
427 * do_fork
428 */
429 if ( pCpu->pCurInstr->opcode == OP_BTR
430 && !(offFault & 4)
431 /** @todo Validate that the bit index is X86_PTE_RW. */
432 )
433 {
434 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
435 return true;
436 }
437 return false;
438}
439
440
441/**
442 * Determin whether the page is likely to have been reused.
443 *
444 * @returns true if we consider the page as being reused for a different purpose.
445 * @returns false if we consider it to still be a paging page.
446 * @param pPage The page in question.
447 * @param pCpu The disassembly info for the faulting insturction.
448 * @param pvFault The fault address.
449 *
450 * @remark The REP prefix check is left to the caller because of STOSD/W.
451 */
452DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
453{
454 switch (pCpu->pCurInstr->opcode)
455 {
456 case OP_PUSH:
457 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
458 return true;
459 case OP_PUSHF:
460 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
461 return true;
462 case OP_PUSHA:
463 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
464 return true;
465 case OP_FXSAVE:
466 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
467 return true;
468 }
469 if ( (pCpu->param1.flags & USE_REG_GEN32)
470 && (pCpu->param1.base.reg_gen32 == USE_REG_ESP))
471 {
472 Log4(("pgmPoolMonitorIsReused: ESP\n"));
473 return true;
474 }
475
476 //if (pPage->fCR3Mix)
477 // return false;
478 return false;
479}
480
481
482/**
483 * Flushes the page being accessed.
484 *
485 * @returns VBox status code suitable for scheduling.
486 * @param pVM The VM handle.
487 * @param pPool The pool.
488 * @param pPage The pool page (head).
489 * @param pCpu The disassembly of the write instruction.
490 * @param pRegFrame The trap register frame.
491 * @param GCPhysFault The fault address as guest physical address.
492 * @param pvFault The fault address.
493 */
494static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
495 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
496{
497 /*
498 * First, do the flushing.
499 */
500 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
501
502 /*
503 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
504 */
505 uint32_t cbWritten;
506 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
507 if (VBOX_SUCCESS(rc2))
508 pRegFrame->eip += pCpu->opsize;
509 else if (rc2 == VERR_EM_INTERPRETER)
510 {
511#ifdef IN_GC
512 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip))
513 {
514 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04:%RGv, ignoring.\n",
515 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
516 rc = VINF_SUCCESS;
517 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
518 }
519 else
520#endif
521 {
522 rc = VINF_EM_RAW_EMULATE_INSTR;
523 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
524 }
525 }
526 else
527 rc = rc2;
528
529 /* See use in pgmPoolAccessHandlerSimple(). */
530 PGM_INVL_GUEST_TLBS();
531
532 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
533 return rc;
534
535}
536
537
538/**
539 * Handles the STOSD write accesses.
540 *
541 * @returns VBox status code suitable for scheduling.
542 * @param pVM The VM handle.
543 * @param pPool The pool.
544 * @param pPage The pool page (head).
545 * @param pCpu The disassembly of the write instruction.
546 * @param pRegFrame The trap register frame.
547 * @param GCPhysFault The fault address as guest physical address.
548 * @param pvFault The fault address.
549 */
550DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
551 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
552{
553 /*
554 * Increment the modification counter and insert it into the list
555 * of modified pages the first time.
556 */
557 if (!pPage->cModifications++)
558 pgmPoolMonitorModifiedInsert(pPool, pPage);
559
560 /*
561 * Execute REP STOSD.
562 *
563 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
564 * write situation, meaning that it's safe to write here.
565 */
566#ifdef IN_GC
567 uint32_t *pu32 = (uint32_t *)pvFault;
568#else
569 RTGCPTR pu32 = pvFault;
570#endif
571 while (pRegFrame->ecx)
572 {
573 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pu32, NULL);
574#ifdef IN_GC
575 *pu32++ = pRegFrame->eax;
576#else
577 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
578 pu32 += 4;
579#endif
580 GCPhysFault += 4;
581 pRegFrame->edi += 4;
582 pRegFrame->ecx--;
583 }
584 pRegFrame->eip += pCpu->opsize;
585
586 /* See use in pgmPoolAccessHandlerSimple(). */
587 PGM_INVL_GUEST_TLBS();
588
589 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
590 return VINF_SUCCESS;
591}
592
593
594/**
595 * Handles the simple write accesses.
596 *
597 * @returns VBox status code suitable for scheduling.
598 * @param pVM The VM handle.
599 * @param pPool The pool.
600 * @param pPage The pool page (head).
601 * @param pCpu The disassembly of the write instruction.
602 * @param pRegFrame The trap register frame.
603 * @param GCPhysFault The fault address as guest physical address.
604 * @param pvFault The fault address.
605 */
606DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
607 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
608{
609 /*
610 * Increment the modification counter and insert it into the list
611 * of modified pages the first time.
612 */
613 if (!pPage->cModifications++)
614 pgmPoolMonitorModifiedInsert(pPool, pPage);
615
616 /*
617 * Clear all the pages. ASSUMES that pvFault is readable.
618 */
619 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
620
621 /*
622 * Interpret the instruction.
623 */
624 uint32_t cb;
625 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
626 if (VBOX_SUCCESS(rc))
627 pRegFrame->eip += pCpu->opsize;
628 else if (rc == VERR_EM_INTERPRETER)
629 {
630# ifdef IN_GC
631 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)(RTGCUINTPTR)pCpu->opaddr))
632 {
633 /* We're not able to handle this in ring-3, so fix the interpreter! */
634 /** @note Should be fine. There's no need to flush the whole thing. */
635#ifndef DEBUG_sandervl
636 AssertMsgFailed(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
637 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
638#endif
639 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch1);
640 rc = pgmPoolMonitorChainFlush(pPool, pPage);
641 }
642 else
643# endif
644 {
645 rc = VINF_EM_RAW_EMULATE_INSTR;
646 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
647 }
648 }
649
650 /*
651 * Quick hack, with logging enabled we're getting stale
652 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
653 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
654 * have to be fixed to support this. But that'll have to wait till next week.
655 *
656 * An alternative is to keep track of the changed PTEs together with the
657 * GCPhys from the guest PT. This may proove expensive though.
658 *
659 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
660 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
661 */
662 PGM_INVL_GUEST_TLBS();
663
664 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
665 return rc;
666}
667
668
669/**
670 * \#PF Handler callback for PT write accesses.
671 *
672 * @returns VBox status code (appropriate for GC return).
673 * @param pVM VM Handle.
674 * @param uErrorCode CPU Error code.
675 * @param pRegFrame Trap register frame.
676 * NULL on DMA and other non CPU access.
677 * @param pvFault The fault address (cr2).
678 * @param GCPhysFault The GC physical address corresponding to pvFault.
679 * @param pvUser User argument.
680 */
681DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
682{
683 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
684 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
685 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
686 LogFlow(("pgmPoolAccessHandler: pvFault=%p pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
687
688 /*
689 * We should ALWAYS have the list head as user parameter. This
690 * is because we use that page to record the changes.
691 */
692 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
693
694 /*
695 * Disassemble the faulting instruction.
696 */
697 DISCPUSTATE Cpu;
698 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
699 AssertRCReturn(rc, rc);
700
701 /*
702 * Check if it's worth dealing with.
703 */
704 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
705 || pPage->fCR3Mix)
706 && !pgmPoolMonitorIsReused(pPage, &Cpu,pvFault)
707 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
708 {
709 /*
710 * Simple instructions, no REP prefix.
711 */
712 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
713 {
714 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
715 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
716 return rc;
717 }
718
719 /*
720 * Windows is frequently doing small memset() operations (netio test 4k+).
721 * We have to deal with these or we'll kill the cache and performance.
722 */
723 if ( Cpu.pCurInstr->opcode == OP_STOSWD
724 && (pRegFrame->cs & X86_SEL_RPL) <= 1
725 && pRegFrame->ecx <= 0x20
726 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
727 && !((uintptr_t)pvFault & 3)
728 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
729 && Cpu.mode == CPUMODE_32BIT
730 && Cpu.opmode == CPUMODE_32BIT
731 && Cpu.addrmode == CPUMODE_32BIT
732 && Cpu.prefix == PREFIX_REP
733 && !pRegFrame->eflags.Bits.u1DF
734 )
735 {
736 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
737 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
738 return rc;
739 }
740
741 /* REP prefix, don't bother. */
742 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
743 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
744 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
745 }
746
747 /*
748 * Not worth it, so flush it.
749 */
750 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
751 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
752 return rc;
753}
754
755# endif /* !IN_RING3 */
756#endif /* PGMPOOL_WITH_MONITORING */
757
758
759
760#ifdef PGMPOOL_WITH_CACHE
761/**
762 * Inserts a page into the GCPhys hash table.
763 *
764 * @param pPool The pool.
765 * @param pPage The page.
766 */
767DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
768{
769 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
770 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
771 pPage->iNext = pPool->aiHash[iHash];
772 pPool->aiHash[iHash] = pPage->idx;
773}
774
775
776/**
777 * Removes a page from the GCPhys hash table.
778 *
779 * @param pPool The pool.
780 * @param pPage The page.
781 */
782DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
783{
784 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
785 if (pPool->aiHash[iHash] == pPage->idx)
786 pPool->aiHash[iHash] = pPage->iNext;
787 else
788 {
789 uint16_t iPrev = pPool->aiHash[iHash];
790 for (;;)
791 {
792 const int16_t i = pPool->aPages[iPrev].iNext;
793 if (i == pPage->idx)
794 {
795 pPool->aPages[iPrev].iNext = pPage->iNext;
796 break;
797 }
798 if (i == NIL_PGMPOOL_IDX)
799 {
800 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
801 break;
802 }
803 iPrev = i;
804 }
805 }
806 pPage->iNext = NIL_PGMPOOL_IDX;
807}
808
809
810/**
811 * Frees up one cache page.
812 *
813 * @returns VBox status code.
814 * @retval VINF_SUCCESS on success.
815 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
816 * @param pPool The pool.
817 * @param iUser The user index.
818 */
819static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
820{
821 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
822 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
823
824 /*
825 * Select one page from the tail of the age list.
826 */
827 uint16_t iToFree = pPool->iAgeTail;
828 if (iToFree == iUser)
829 iToFree = pPool->aPages[iToFree].iAgePrev;
830/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
831 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
832 {
833 uint16_t i = pPool->aPages[iToFree].iAgePrev;
834 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
835 {
836 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
837 continue;
838 iToFree = i;
839 break;
840 }
841 }
842*/
843 Assert(iToFree != iUser);
844 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
845
846 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
847 if (rc == VINF_SUCCESS)
848 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
849 return rc;
850}
851
852
853/**
854 * Checks if a kind mismatch is really a page being reused
855 * or if it's just normal remappings.
856 *
857 * @returns true if reused and the cached page (enmKind1) should be flushed
858 * @returns false if not reused.
859 * @param enmKind1 The kind of the cached page.
860 * @param enmKind2 The kind of the requested page.
861 */
862static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
863{
864 switch (enmKind1)
865 {
866 /*
867 * It's prefectly fine to reuse these, except for PAE stuff.
868 */
869 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
870 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
871 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
872 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
873 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
874 switch (enmKind2)
875 {
876 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
877 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
878 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
879 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
880 return true;
881 default:
882 return false;
883 }
884
885 /*
886 * It's prefectly fine to reuse these, except for non-PAE stuff.
887 */
888 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
889 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
890 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
891 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
892 switch (enmKind2)
893 {
894 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
895 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
896 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
897 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
898 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
899 return true;
900 default:
901 return false;
902 }
903
904 /*
905 * These cannot be flushed, and it's common to reused the PDs as PTs.
906 */
907 case PGMPOOLKIND_ROOT_32BIT_PD:
908 case PGMPOOLKIND_ROOT_PAE_PD:
909 case PGMPOOLKIND_ROOT_PDPTR:
910 case PGMPOOLKIND_ROOT_PML4:
911 return false;
912
913 default:
914 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
915 }
916}
917
918
919/**
920 * Attempts to satisfy a pgmPoolAlloc request from the cache.
921 *
922 * @returns VBox status code.
923 * @retval VINF_PGM_CACHED_PAGE on success.
924 * @retval VERR_FILE_NOT_FOUND if not found.
925 * @param pPool The pool.
926 * @param GCPhys The GC physical address of the page we're gonna shadow.
927 * @param enmKind The kind of mapping.
928 * @param iUser The shadow page pool index of the user table.
929 * @param iUserTable The index into the user table (shadowed).
930 * @param ppPage Where to store the pointer to the page.
931 */
932static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
933{
934 /*
935 * Look up the GCPhys in the hash.
936 */
937 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
938 if (i != NIL_PGMPOOL_IDX)
939 {
940 do
941 {
942 PPGMPOOLPAGE pPage = &pPool->aPages[i];
943 if (pPage->GCPhys == GCPhys)
944 {
945 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
946 {
947 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
948 if (VBOX_SUCCESS(rc))
949 {
950 *ppPage = pPage;
951 STAM_COUNTER_INC(&pPool->StatCacheHits);
952 return VINF_PGM_CACHED_PAGE;
953 }
954 return rc;
955 }
956
957 /*
958 * The kind is different. In some cases we should now flush the page
959 * as it has been reused, but in most cases this is normal remapping
960 * of PDs as PT or big pages using the GCPhys field in a sligly
961 * different way than the other kinds.
962 */
963 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
964 {
965 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
966 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
967 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
968 break;
969 }
970 }
971
972 /* next */
973 i = pPage->iNext;
974 } while (i != NIL_PGMPOOL_IDX);
975 }
976
977 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
978 STAM_COUNTER_INC(&pPool->StatCacheMisses);
979 return VERR_FILE_NOT_FOUND;
980}
981
982
983/**
984 * Inserts a page into the cache.
985 *
986 * @param pPool The pool.
987 * @param pPage The cached page.
988 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
989 */
990static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
991{
992 /*
993 * Insert into the GCPhys hash if the page is fit for that.
994 */
995 Assert(!pPage->fCached);
996 if (fCanBeCached)
997 {
998 pPage->fCached = true;
999 pgmPoolHashInsert(pPool, pPage);
1000 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1001 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1002 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1003 }
1004 else
1005 {
1006 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1007 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1008 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1009 }
1010
1011 /*
1012 * Insert at the head of the age list.
1013 */
1014 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1015 pPage->iAgeNext = pPool->iAgeHead;
1016 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1017 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1018 else
1019 pPool->iAgeTail = pPage->idx;
1020 pPool->iAgeHead = pPage->idx;
1021}
1022
1023
1024/**
1025 * Flushes a cached page.
1026 *
1027 * @param pPool The pool.
1028 * @param pPage The cached page.
1029 */
1030static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1031{
1032 /*
1033 * Remove the page from the hash.
1034 */
1035 if (pPage->fCached)
1036 {
1037 pPage->fCached = false;
1038 pgmPoolHashRemove(pPool, pPage);
1039 }
1040 else
1041 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1042
1043 /*
1044 * Remove it from the age list.
1045 */
1046 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1047 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1048 else
1049 pPool->iAgeTail = pPage->iAgePrev;
1050 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1051 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1052 else
1053 pPool->iAgeHead = pPage->iAgeNext;
1054 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1055 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1056}
1057#endif /* PGMPOOL_WITH_CACHE */
1058
1059
1060#ifdef PGMPOOL_WITH_MONITORING
1061/**
1062 * Looks for pages sharing the monitor.
1063 *
1064 * @returns Pointer to the head page.
1065 * @returns NULL if not found.
1066 * @param pPool The Pool
1067 * @param pNewPage The page which is going to be monitored.
1068 */
1069static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1070{
1071#ifdef PGMPOOL_WITH_CACHE
1072 /*
1073 * Look up the GCPhys in the hash.
1074 */
1075 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1076 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1077 if (i == NIL_PGMPOOL_IDX)
1078 return NULL;
1079 do
1080 {
1081 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1082 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1083 && pPage != pNewPage)
1084 {
1085 switch (pPage->enmKind)
1086 {
1087 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1088 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1089 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1090 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1091 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1092 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1093 case PGMPOOLKIND_ROOT_32BIT_PD:
1094 case PGMPOOLKIND_ROOT_PAE_PD:
1095 case PGMPOOLKIND_ROOT_PDPTR:
1096 case PGMPOOLKIND_ROOT_PML4:
1097 {
1098 /* find the head */
1099 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1100 {
1101 Assert(pPage->iMonitoredPrev != pPage->idx);
1102 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1103 }
1104 return pPage;
1105 }
1106
1107 /* ignore, no monitoring. */
1108 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1109 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1110 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1111 break;
1112 default:
1113 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1114 }
1115 }
1116
1117 /* next */
1118 i = pPage->iNext;
1119 } while (i != NIL_PGMPOOL_IDX);
1120#endif
1121 return NULL;
1122}
1123
1124/**
1125 * Enabled write monitoring of a guest page.
1126 *
1127 * @returns VBox status code.
1128 * @retval VINF_SUCCESS on success.
1129 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1130 * @param pPool The pool.
1131 * @param pPage The cached page.
1132 */
1133static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1134{
1135 /*
1136 * Filter out the relevant kinds.
1137 */
1138 switch (pPage->enmKind)
1139 {
1140 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1141 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1142 break;
1143
1144 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1145 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1146 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1147 /* Nothing to monitor here. */
1148 return VINF_SUCCESS;
1149
1150 case PGMPOOLKIND_ROOT_32BIT_PD:
1151 case PGMPOOLKIND_ROOT_PAE_PD:
1152#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1153 break;
1154#endif
1155 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1156 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1157 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1158 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1159 case PGMPOOLKIND_ROOT_PDPTR:
1160 case PGMPOOLKIND_ROOT_PML4:
1161 default:
1162 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1163 }
1164
1165 /*
1166 * Install handler.
1167 */
1168 int rc;
1169 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1170 if (pPageHead)
1171 {
1172 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1173 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1174 pPage->iMonitoredPrev = pPageHead->idx;
1175 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1176 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1177 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1178 pPageHead->iMonitoredNext = pPage->idx;
1179 rc = VINF_SUCCESS;
1180 }
1181 else
1182 {
1183 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1184 PVM pVM = pPool->CTXSUFF(pVM);
1185 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1186 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1187 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1188 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pPage),
1189 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pPage),
1190 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pPage),
1191 pPool->pszAccessHandler);
1192 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1193 * the heap size should suffice. */
1194 AssertFatalRC(rc);
1195 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1196 rc = VERR_PGM_POOL_CLEARED;
1197 }
1198 pPage->fMonitored = true;
1199 return rc;
1200}
1201
1202
1203/**
1204 * Disables write monitoring of a guest page.
1205 *
1206 * @returns VBox status code.
1207 * @retval VINF_SUCCESS on success.
1208 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1209 * @param pPool The pool.
1210 * @param pPage The cached page.
1211 */
1212static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1213{
1214 /*
1215 * Filter out the relevant kinds.
1216 */
1217 switch (pPage->enmKind)
1218 {
1219 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1220 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1221 break;
1222
1223 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1224 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1225 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1226 /* Nothing to monitor here. */
1227 return VINF_SUCCESS;
1228
1229 case PGMPOOLKIND_ROOT_32BIT_PD:
1230 case PGMPOOLKIND_ROOT_PAE_PD:
1231#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1232 break;
1233#endif
1234 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1235 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1236 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1237 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1238 case PGMPOOLKIND_ROOT_PDPTR:
1239 case PGMPOOLKIND_ROOT_PML4:
1240 default:
1241 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1242 }
1243
1244 /*
1245 * Remove the page from the monitored list or uninstall it if last.
1246 */
1247 const PVM pVM = pPool->CTXSUFF(pVM);
1248 int rc;
1249 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1250 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1251 {
1252 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1253 {
1254 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1255 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1256 pNewHead->fCR3Mix = pPage->fCR3Mix;
1257 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1258 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pNewHead),
1259 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pNewHead),
1260 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pNewHead),
1261 pPool->pszAccessHandler);
1262 AssertFatalRCSuccess(rc);
1263 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1264 }
1265 else
1266 {
1267 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1268 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1269 {
1270 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1271 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1272 }
1273 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1274 rc = VINF_SUCCESS;
1275 }
1276 }
1277 else
1278 {
1279 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1280 AssertFatalRC(rc);
1281 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1282 rc = VERR_PGM_POOL_CLEARED;
1283 }
1284 pPage->fMonitored = false;
1285
1286 /*
1287 * Remove it from the list of modified pages (if in it).
1288 */
1289 pgmPoolMonitorModifiedRemove(pPool, pPage);
1290
1291 return rc;
1292}
1293
1294
1295#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1296/**
1297 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1298 *
1299 * @param pPool The Pool.
1300 * @param pPage A page in the chain.
1301 * @param fCR3Mix The new fCR3Mix value.
1302 */
1303static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1304{
1305 /* current */
1306 pPage->fCR3Mix = fCR3Mix;
1307
1308 /* before */
1309 int16_t idx = pPage->iMonitoredPrev;
1310 while (idx != NIL_PGMPOOL_IDX)
1311 {
1312 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1313 idx = pPool->aPages[idx].iMonitoredPrev;
1314 }
1315
1316 /* after */
1317 idx = pPage->iMonitoredNext;
1318 while (idx != NIL_PGMPOOL_IDX)
1319 {
1320 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1321 idx = pPool->aPages[idx].iMonitoredNext;
1322 }
1323}
1324
1325
1326/**
1327 * Installs or modifies monitoring of a CR3 page (special).
1328 *
1329 * We're pretending the CR3 page is shadowed by the pool so we can use the
1330 * generic mechanisms in detecting chained monitoring. (This also gives us a
1331 * tast of what code changes are required to really pool CR3 shadow pages.)
1332 *
1333 * @returns VBox status code.
1334 * @param pPool The pool.
1335 * @param idxRoot The CR3 (root) page index.
1336 * @param GCPhysCR3 The (new) CR3 value.
1337 */
1338int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1339{
1340 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1341 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1342 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1343 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1344
1345 /*
1346 * The unlikely case where it already matches.
1347 */
1348 if (pPage->GCPhys == GCPhysCR3)
1349 {
1350 Assert(pPage->fMonitored);
1351 return VINF_SUCCESS;
1352 }
1353
1354 /*
1355 * Flush the current monitoring and remove it from the hash.
1356 */
1357 int rc = VINF_SUCCESS;
1358 if (pPage->fMonitored)
1359 {
1360 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1361 rc = pgmPoolMonitorFlush(pPool, pPage);
1362 if (rc == VERR_PGM_POOL_CLEARED)
1363 rc = VINF_SUCCESS;
1364 else
1365 AssertFatalRC(rc);
1366 pgmPoolHashRemove(pPool, pPage);
1367 }
1368
1369 /*
1370 * Monitor the page at the new location and insert it into the hash.
1371 */
1372 pPage->GCPhys = GCPhysCR3;
1373 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1374 if (rc2 != VERR_PGM_POOL_CLEARED)
1375 {
1376 AssertFatalRC(rc2);
1377 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1378 rc = rc2;
1379 }
1380 pgmPoolHashInsert(pPool, pPage);
1381 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1382 return rc;
1383}
1384
1385
1386/**
1387 * Removes the monitoring of a CR3 page (special).
1388 *
1389 * @returns VBox status code.
1390 * @param pPool The pool.
1391 * @param idxRoot The CR3 (root) page index.
1392 */
1393int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1394{
1395 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1396 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1397 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1398 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1399
1400 if (!pPage->fMonitored)
1401 return VINF_SUCCESS;
1402
1403 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1404 int rc = pgmPoolMonitorFlush(pPool, pPage);
1405 if (rc != VERR_PGM_POOL_CLEARED)
1406 AssertFatalRC(rc);
1407 else
1408 rc = VINF_SUCCESS;
1409 pgmPoolHashRemove(pPool, pPage);
1410 Assert(!pPage->fMonitored);
1411 pPage->GCPhys = NIL_RTGCPHYS;
1412 return rc;
1413}
1414#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1415
1416
1417/**
1418 * Inserts the page into the list of modified pages.
1419 *
1420 * @param pPool The pool.
1421 * @param pPage The page.
1422 */
1423void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1424{
1425 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1426 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1427 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1428 && pPool->iModifiedHead != pPage->idx,
1429 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1430 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1431 pPool->iModifiedHead, pPool->cModifiedPages));
1432
1433 pPage->iModifiedNext = pPool->iModifiedHead;
1434 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1435 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1436 pPool->iModifiedHead = pPage->idx;
1437 pPool->cModifiedPages++;
1438#ifdef VBOX_WITH_STATISTICS
1439 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1440 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1441#endif
1442}
1443
1444
1445/**
1446 * Removes the page from the list of modified pages and resets the
1447 * moficiation counter.
1448 *
1449 * @param pPool The pool.
1450 * @param pPage The page which is believed to be in the list of modified pages.
1451 */
1452static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1453{
1454 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1455 if (pPool->iModifiedHead == pPage->idx)
1456 {
1457 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1458 pPool->iModifiedHead = pPage->iModifiedNext;
1459 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1460 {
1461 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1462 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1463 }
1464 pPool->cModifiedPages--;
1465 }
1466 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1467 {
1468 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1469 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1470 {
1471 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1472 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1473 }
1474 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1475 pPool->cModifiedPages--;
1476 }
1477 else
1478 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1479 pPage->cModifications = 0;
1480}
1481
1482
1483/**
1484 * Zaps the list of modified pages, resetting their modification counters in the process.
1485 *
1486 * @param pVM The VM handle.
1487 */
1488void pgmPoolMonitorModifiedClearAll(PVM pVM)
1489{
1490 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1491 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1492
1493 unsigned cPages = 0; NOREF(cPages);
1494 uint16_t idx = pPool->iModifiedHead;
1495 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1496 while (idx != NIL_PGMPOOL_IDX)
1497 {
1498 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1499 idx = pPage->iModifiedNext;
1500 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1501 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1502 pPage->cModifications = 0;
1503 Assert(++cPages);
1504 }
1505 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1506 pPool->cModifiedPages = 0;
1507}
1508
1509
1510/**
1511 * Clear all shadow pages and clear all modification counters.
1512 *
1513 * @param pVM The VM handle.
1514 * @remark Should only be used when monitoring is available, thus placed in
1515 * the PGMPOOL_WITH_MONITORING #ifdef.
1516 */
1517void pgmPoolClearAll(PVM pVM)
1518{
1519 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1520 STAM_PROFILE_START(&pPool->StatClearAll, c);
1521 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1522
1523 /*
1524 * Iterate all the pages until we've encountered all that in use.
1525 * This is simple but not quite optimal solution.
1526 */
1527 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1528 unsigned cLeft = pPool->cUsedPages;
1529 unsigned iPage = pPool->cCurPages;
1530 while (--iPage >= PGMPOOL_IDX_FIRST)
1531 {
1532 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1533 if (pPage->GCPhys != NIL_RTGCPHYS)
1534 {
1535 switch (pPage->enmKind)
1536 {
1537 /*
1538 * We only care about shadow page tables.
1539 */
1540 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1541 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1542 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1543 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1544 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1545 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1546 {
1547#ifdef PGMPOOL_WITH_USER_TRACKING
1548 if (pPage->cPresent)
1549#endif
1550 {
1551 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1552 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1553 ASMMemZeroPage(pvShw);
1554 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1555#ifdef PGMPOOL_WITH_USER_TRACKING
1556 pPage->cPresent = 0;
1557 pPage->iFirstPresent = ~0;
1558#endif
1559 }
1560 }
1561 /* fall thru */
1562
1563 default:
1564 Assert(!pPage->cModifications || ++cModifiedPages);
1565 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1566 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1567 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1568 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1569 pPage->cModifications = 0;
1570 break;
1571
1572 }
1573 if (!--cLeft)
1574 break;
1575 }
1576 }
1577
1578 /* swipe the special pages too. */
1579 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1580 {
1581 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1582 if (pPage->GCPhys != NIL_RTGCPHYS)
1583 {
1584 Assert(!pPage->cModifications || ++cModifiedPages);
1585 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1586 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1587 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1588 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1589 pPage->cModifications = 0;
1590 }
1591 }
1592
1593 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1594 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1595 pPool->cModifiedPages = 0;
1596
1597#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1598 /*
1599 * Clear all the GCPhys links and rebuild the phys ext free list.
1600 */
1601 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
1602 pRam;
1603 pRam = pRam->CTXSUFF(pNext))
1604 {
1605 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1606 while (iPage-- > 0)
1607 pRam->aHCPhys[iPage] &= MM_RAM_FLAGS_NO_REFS_MASK;
1608 }
1609
1610 pPool->iPhysExtFreeHead = 0;
1611 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1612 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1613 for (unsigned i = 0; i < cMaxPhysExts; i++)
1614 {
1615 paPhysExts[i].iNext = i + 1;
1616 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1617 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1618 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1619 }
1620 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1621#endif
1622
1623
1624 pPool->cPresent = 0;
1625 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1626}
1627#endif /* PGMPOOL_WITH_MONITORING */
1628
1629
1630#ifdef PGMPOOL_WITH_USER_TRACKING
1631/**
1632 * Frees up at least one user entry.
1633 *
1634 * @returns VBox status code.
1635 * @retval VINF_SUCCESS if successfully added.
1636 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1637 * @param pPool The pool.
1638 * @param iUser The user index.
1639 */
1640static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1641{
1642 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1643#ifdef PGMPOOL_WITH_CACHE
1644 /*
1645 * Just free cached pages in a braindead fashion.
1646 */
1647 /** @todo walk the age list backwards and free the first with usage. */
1648 int rc = VINF_SUCCESS;
1649 do
1650 {
1651 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1652 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1653 rc = rc2;
1654 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1655 return rc;
1656#else
1657 /*
1658 * Lazy approach.
1659 */
1660 pgmPoolFlushAllInt(pPool);
1661 return VERR_PGM_POOL_FLUSHED;
1662#endif
1663}
1664
1665
1666/**
1667 * Inserts a page into the cache.
1668 *
1669 * This will create user node for the page, insert it into the GCPhys
1670 * hash, and insert it into the age list.
1671 *
1672 * @returns VBox status code.
1673 * @retval VINF_SUCCESS if successfully added.
1674 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1675 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1676 * @param pPool The pool.
1677 * @param pPage The cached page.
1678 * @param GCPhys The GC physical address of the page we're gonna shadow.
1679 * @param iUser The user index.
1680 * @param iUserTable The user table index.
1681 */
1682DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1683{
1684 int rc = VINF_SUCCESS;
1685 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1686
1687 /*
1688 * Find free a user node.
1689 */
1690 uint16_t i = pPool->iUserFreeHead;
1691 if (i == NIL_PGMPOOL_USER_INDEX)
1692 {
1693 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1694 if (VBOX_FAILURE(rc))
1695 return rc;
1696 i = pPool->iUserFreeHead;
1697 }
1698
1699 /*
1700 * Unlink the user node from the free list,
1701 * initialize and insert it into the user list.
1702 */
1703 pPool->iUserFreeHead = pUser[i].iNext;
1704 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1705 pUser[i].iUser = iUser;
1706 pUser[i].iUserTable = iUserTable;
1707 pPage->iUserHead = i;
1708
1709 /*
1710 * Insert into cache and enable monitoring of the guest page if enabled.
1711 *
1712 * Until we implement caching of all levels, including the CR3 one, we'll
1713 * have to make sure we don't try monitor & cache any recursive reuse of
1714 * a monitored CR3 page. Because all windows versions are doing this we'll
1715 * have to be able to do combined access monitoring, CR3 + PT and
1716 * PD + PT (guest PAE).
1717 *
1718 * Update:
1719 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1720 */
1721#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1722# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1723 const bool fCanBeMonitored = true;
1724# else
1725 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1726 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1727 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1728# endif
1729# ifdef PGMPOOL_WITH_CACHE
1730 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1731# endif
1732 if (fCanBeMonitored)
1733 {
1734# ifdef PGMPOOL_WITH_MONITORING
1735 rc = pgmPoolMonitorInsert(pPool, pPage);
1736 if (rc == VERR_PGM_POOL_CLEARED)
1737 {
1738 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1739# ifndef PGMPOOL_WITH_CACHE
1740 pgmPoolMonitorFlush(pPool, pPage);
1741 rc = VERR_PGM_POOL_FLUSHED;
1742# endif
1743 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1744 pUser[i].iNext = pPool->iUserFreeHead;
1745 pUser[i].iUser = NIL_PGMPOOL_IDX;
1746 pPool->iUserFreeHead = i;
1747 }
1748 }
1749# endif
1750#endif /* PGMPOOL_WITH_MONITORING */
1751 return rc;
1752}
1753
1754
1755# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1756/**
1757 * Adds a user reference to a page.
1758 *
1759 * This will
1760 * This will move the page to the head of the
1761 *
1762 * @returns VBox status code.
1763 * @retval VINF_SUCCESS if successfully added.
1764 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1765 * @param pPool The pool.
1766 * @param pPage The cached page.
1767 * @param iUser The user index.
1768 * @param iUserTable The user table.
1769 */
1770static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1771{
1772 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1773
1774# ifdef VBOX_STRICT
1775 /*
1776 * Check that the entry doesn't already exists.
1777 */
1778 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1779 {
1780 uint16_t i = pPage->iUserHead;
1781 do
1782 {
1783 Assert(i < pPool->cMaxUsers);
1784 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%d %d\n", iUser, iUserTable));
1785 i = paUsers[i].iNext;
1786 } while (i != NIL_PGMPOOL_USER_INDEX);
1787 }
1788# endif
1789
1790 /*
1791 * Allocate a user node.
1792 */
1793 uint16_t i = pPool->iUserFreeHead;
1794 if (i == NIL_PGMPOOL_USER_INDEX)
1795 {
1796 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1797 if (VBOX_FAILURE(rc))
1798 return rc;
1799 i = pPool->iUserFreeHead;
1800 }
1801 pPool->iUserFreeHead = paUsers[i].iNext;
1802
1803 /*
1804 * Initialize the user node and insert it.
1805 */
1806 paUsers[i].iNext = pPage->iUserHead;
1807 paUsers[i].iUser = iUser;
1808 paUsers[i].iUserTable = iUserTable;
1809 pPage->iUserHead = i;
1810
1811# ifdef PGMPOOL_WITH_CACHE
1812 /*
1813 * Tell the cache to update its replacement stats for this page.
1814 */
1815 pgmPoolCacheUsed(pPool, pPage);
1816# endif
1817 return VINF_SUCCESS;
1818}
1819# endif /* PGMPOOL_WITH_CACHE */
1820
1821
1822/**
1823 * Frees a user record associated with a page.
1824 *
1825 * This does not clear the entry in the user table, it simply replaces the
1826 * user record to the chain of free records.
1827 *
1828 * @param pPool The pool.
1829 * @param HCPhys The HC physical address of the shadow page.
1830 * @param iUser The shadow page pool index of the user table.
1831 * @param iUserTable The index into the user table (shadowed).
1832 */
1833static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1834{
1835 /*
1836 * Unlink and free the specified user entry.
1837 */
1838 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1839
1840 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1841 uint16_t i = pPage->iUserHead;
1842 if ( i != NIL_PGMPOOL_USER_INDEX
1843 && paUsers[i].iUser == iUser
1844 && paUsers[i].iUserTable == iUserTable)
1845 {
1846 pPage->iUserHead = paUsers[i].iNext;
1847
1848 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1849 paUsers[i].iNext = pPool->iUserFreeHead;
1850 pPool->iUserFreeHead = i;
1851 return;
1852 }
1853
1854 /* General: Linear search. */
1855 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1856 while (i != NIL_PGMPOOL_USER_INDEX)
1857 {
1858 if ( paUsers[i].iUser == iUser
1859 && paUsers[i].iUserTable == iUserTable)
1860 {
1861 if (iPrev != NIL_PGMPOOL_USER_INDEX)
1862 paUsers[iPrev].iNext = paUsers[i].iNext;
1863 else
1864 pPage->iUserHead = paUsers[i].iNext;
1865
1866 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1867 paUsers[i].iNext = pPool->iUserFreeHead;
1868 pPool->iUserFreeHead = i;
1869 return;
1870 }
1871 iPrev = i;
1872 i = paUsers[i].iNext;
1873 }
1874
1875 /* Fatal: didn't find it */
1876 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
1877 iUser, iUserTable, pPage->GCPhys));
1878}
1879
1880
1881/**
1882 * Gets the entry size of a shadow table.
1883 *
1884 * @param enmKind
1885 * The kind of page.
1886 *
1887 * @returns The size of the entry in bytes. That is, 4 or 8.
1888 * @returns If the kind is not for a table, an assertion is raised and 0 is
1889 * returned.
1890 */
1891DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
1892{
1893 switch (enmKind)
1894 {
1895 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1896 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1897 case PGMPOOLKIND_ROOT_32BIT_PD:
1898 return 4;
1899
1900 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1901 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1902 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1903 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1904 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1905 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1906 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1907 case PGMPOOLKIND_ROOT_PAE_PD:
1908 case PGMPOOLKIND_ROOT_PDPTR:
1909 case PGMPOOLKIND_ROOT_PML4:
1910 return 8;
1911
1912 default:
1913 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1914 }
1915}
1916
1917
1918/**
1919 * Gets the entry size of a guest table.
1920 *
1921 * @param enmKind
1922 * The kind of page.
1923 *
1924 * @returns The size of the entry in bytes. That is, 4 or 8.
1925 * @returns If the kind is not for a table, an assertion is raised and 0 is
1926 * returned.
1927 */
1928DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
1929{
1930 switch (enmKind)
1931 {
1932 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1933 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1934 case PGMPOOLKIND_ROOT_32BIT_PD:
1935 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1936 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1937 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1938 return 4;
1939
1940 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1941 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1942 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1943 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1944 case PGMPOOLKIND_ROOT_PAE_PD:
1945 case PGMPOOLKIND_ROOT_PDPTR:
1946 case PGMPOOLKIND_ROOT_PML4:
1947 return 8;
1948
1949 default:
1950 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1951 }
1952}
1953
1954
1955#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1956/**
1957 * Scans one shadow page table for mappings of a physical page.
1958 *
1959 * @param pVM The VM handle.
1960 * @param pHCPhys The aHCPhys ramrange entry in question.
1961 * @param iShw The shadow page table.
1962 * @param cRefs The number of references made in that PT.
1963 */
1964static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCRTHCPHYS pHCPhys, uint16_t iShw, uint16_t cRefs)
1965{
1966 LogFlow(("pgmPoolTrackFlushGCPhysPT: pHCPhys=%p:{%RHp} iShw=%d cRefs=%d\n", pHCPhys, *pHCPhys, iShw, cRefs));
1967 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1968
1969 /*
1970 * Assert sanity.
1971 */
1972 Assert(cRefs == 1);
1973 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
1974 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
1975
1976 /*
1977 * Then, clear the actual mappings to the page in the shadow PT.
1978 */
1979 switch (pPage->enmKind)
1980 {
1981 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1982 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1983 {
1984 const uint32_t u32 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
1985 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
1986 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
1987 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
1988 {
1989 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
1990 pPT->a[i].u = 0;
1991 cRefs--;
1992 if (!cRefs)
1993 return;
1994 }
1995#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
1996 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
1997 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
1998 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
1999 {
2000 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2001 pPT->a[i].u = 0;
2002 }
2003#endif
2004 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2005 break;
2006 }
2007
2008 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2009 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2010 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2011 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2012 {
2013 const uint64_t u64 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2014 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2015 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2016 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2017 {
2018 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2019 pPT->a[i].u = 0;
2020 cRefs--;
2021 if (!cRefs)
2022 return;
2023 }
2024#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2025 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2026 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2027 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2028 {
2029 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2030 pPT->a[i].u = 0;
2031 }
2032#endif
2033 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2034 break;
2035 }
2036
2037 default:
2038 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2039 }
2040}
2041
2042
2043/**
2044 * Scans one shadow page table for mappings of a physical page.
2045 *
2046 * @param pVM The VM handle.
2047 * @param pHCPhys The aHCPhys ramrange entry in question.
2048 * @param iShw The shadow page table.
2049 * @param cRefs The number of references made in that PT.
2050 */
2051void pgmPoolTrackFlushGCPhysPT(PVM pVM, PRTHCPHYS pHCPhys, uint16_t iShw, uint16_t cRefs)
2052{
2053 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2054 LogFlow(("pgmPoolTrackFlushGCPhysPT: pHCPhys=%p:{%RHp} iShw=%d cRefs=%d\n", pHCPhys, *pHCPhys, iShw, cRefs));
2055 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2056 pgmPoolTrackFlushGCPhysPTInt(pVM, pHCPhys, iShw, cRefs);
2057 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2058 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2059}
2060
2061
2062/**
2063 * Flushes a list of shadow page tables mapping the same physical page.
2064 *
2065 * @param pVM The VM handle.
2066 * @param pHCPhys The aHCPhys ramrange entry in question.
2067 * @param iPhysExt The physical cross reference extent list to flush.
2068 */
2069void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PRTHCPHYS pHCPhys, uint16_t iPhysExt)
2070{
2071 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2072 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2073 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pHCPhys=%p:{%RHp} iPhysExt\n", pHCPhys, *pHCPhys, iPhysExt));
2074
2075 const uint16_t iPhysExtStart = iPhysExt;
2076 PPGMPOOLPHYSEXT pPhysExt;
2077 do
2078 {
2079 Assert(iPhysExt < pPool->cMaxPhysExts);
2080 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2081 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2082 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2083 {
2084 pgmPoolTrackFlushGCPhysPTInt(pVM, pHCPhys, pPhysExt->aidx[i], 1);
2085 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2086 }
2087
2088 /* next */
2089 iPhysExt = pPhysExt->iNext;
2090 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2091
2092 /* insert the list into the free list and clear the ram range entry. */
2093 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2094 pPool->iPhysExtFreeHead = iPhysExtStart;
2095 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2096
2097 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2098}
2099#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2100
2101
2102/**
2103 * Scans all shadow page tables for mappings of a physical page.
2104 *
2105 * This may be slow, but it's most likely more efficient than cleaning
2106 * out the entire page pool / cache.
2107 *
2108 * @returns VBox status code.
2109 * @retval VINF_SUCCESS if all references has been successfully cleared.
2110 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2111 * a page pool cleaning.
2112 *
2113 * @param pVM The VM handle.
2114 * @param pHCPhys The aHCPhys ramrange entry in question.
2115 */
2116int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PRTHCPHYS pHCPhys)
2117{
2118 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2119 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2120 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d *pHCPhys=%RHp\n",
2121 pPool->cUsedPages, pPool->cPresent, *pHCPhys));
2122
2123#if 1
2124 /*
2125 * There is a limit to what makes sense.
2126 */
2127 if (pPool->cPresent > 1024)
2128 {
2129 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2130 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2131 return VINF_PGM_GCPHYS_ALIASED;
2132 }
2133#endif
2134
2135 /*
2136 * Iterate all the pages until we've encountered all that in use.
2137 * This is simple but not quite optimal solution.
2138 */
2139 const uint64_t u64 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2140 const uint32_t u32 = u64;
2141 unsigned cLeft = pPool->cUsedPages;
2142 unsigned iPage = pPool->cCurPages;
2143 while (--iPage >= PGMPOOL_IDX_FIRST)
2144 {
2145 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2146 if (pPage->GCPhys != NIL_RTGCPHYS)
2147 {
2148 switch (pPage->enmKind)
2149 {
2150 /*
2151 * We only care about shadow page tables.
2152 */
2153 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2154 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2155 {
2156 unsigned cPresent = pPage->cPresent;
2157 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2158 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2159 if (pPT->a[i].n.u1Present)
2160 {
2161 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2162 {
2163 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2164 pPT->a[i].u = 0;
2165 }
2166 if (!--cPresent)
2167 break;
2168 }
2169 break;
2170 }
2171
2172 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2173 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2174 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2175 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2176 {
2177 unsigned cPresent = pPage->cPresent;
2178 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2179 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2180 if (pPT->a[i].n.u1Present)
2181 {
2182 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2183 {
2184 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2185 pPT->a[i].u = 0;
2186 }
2187 if (!--cPresent)
2188 break;
2189 }
2190 break;
2191 }
2192 }
2193 if (!--cLeft)
2194 break;
2195 }
2196 }
2197
2198 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2199 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2200 return VINF_SUCCESS;
2201}
2202
2203
2204/**
2205 * Clears the user entry in a user table.
2206 *
2207 * This is used to remove all references to a page when flushing it.
2208 */
2209static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2210{
2211 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2212 Assert(pUser->iUser < pPool->cCurPages);
2213
2214 /*
2215 * Map the user page.
2216 */
2217 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2218 union
2219 {
2220 uint64_t *pau64;
2221 uint32_t *pau32;
2222 } u;
2223 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2224
2225#ifdef VBOX_STRICT
2226 /*
2227 * Some sanity checks.
2228 */
2229 switch (pUserPage->enmKind)
2230 {
2231 case PGMPOOLKIND_ROOT_32BIT_PD:
2232 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2233 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2234 break;
2235 case PGMPOOLKIND_ROOT_PAE_PD:
2236 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2237 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2238 break;
2239 case PGMPOOLKIND_ROOT_PDPTR:
2240 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2241 Assert(pUser->iUserTable < 4);
2242 break;
2243 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2244 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2245 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2246 break;
2247 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2248 case PGMPOOLKIND_ROOT_PML4:
2249 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2250 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2251 break;
2252 default:
2253 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2254 break;
2255 }
2256#endif /* VBOX_STRICT */
2257
2258 /*
2259 * Clear the entry in the user page.
2260 */
2261 switch (pUserPage->enmKind)
2262 {
2263 /* 32-bit entries */
2264 case PGMPOOLKIND_ROOT_32BIT_PD:
2265 u.pau32[pUser->iUserTable] = 0;
2266 break;
2267
2268 /* 64-bit entries */
2269 case PGMPOOLKIND_ROOT_PAE_PD:
2270 case PGMPOOLKIND_ROOT_PDPTR:
2271 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2272 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2273 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2274 case PGMPOOLKIND_ROOT_PML4:
2275 u.pau64[pUser->iUserTable] = 0;
2276 break;
2277
2278 default:
2279 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2280 }
2281}
2282
2283
2284/**
2285 * Clears all users of a page.
2286 */
2287static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2288{
2289 /*
2290 * Free all the user records.
2291 */
2292 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2293 uint16_t i = pPage->iUserHead;
2294 while (i != NIL_PGMPOOL_USER_INDEX)
2295 {
2296 /* Clear enter in user table. */
2297 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2298
2299 /* Free it. */
2300 const uint16_t iNext = paUsers[i].iNext;
2301 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2302 paUsers[i].iNext = pPool->iUserFreeHead;
2303 pPool->iUserFreeHead = i;
2304
2305 /* Next. */
2306 i = iNext;
2307 }
2308 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2309}
2310
2311
2312#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2313/**
2314 * Allocates a new physical cross reference extent.
2315 *
2316 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2317 * @param pVM The VM handle.
2318 * @param piPhysExt Where to store the phys ext index.
2319 */
2320PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2321{
2322 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2323 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2324 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2325 {
2326 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2327 return NULL;
2328 }
2329 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2330 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2331 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2332 *piPhysExt = iPhysExt;
2333 return pPhysExt;
2334}
2335
2336
2337/**
2338 * Frees a physical cross reference extent.
2339 *
2340 * @param pVM The VM handle.
2341 * @param iPhysExt The extent to free.
2342 */
2343void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2344{
2345 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2346 Assert(iPhysExt < pPool->cMaxPhysExts);
2347 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2348 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2349 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2350 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2351 pPool->iPhysExtFreeHead = iPhysExt;
2352}
2353
2354
2355/**
2356 * Frees a physical cross reference extent.
2357 *
2358 * @param pVM The VM handle.
2359 * @param iPhysExt The extent to free.
2360 */
2361void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2362{
2363 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2364
2365 const uint16_t iPhysExtStart = iPhysExt;
2366 PPGMPOOLPHYSEXT pPhysExt;
2367 do
2368 {
2369 Assert(iPhysExt < pPool->cMaxPhysExts);
2370 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2371 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2372 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2373
2374 /* next */
2375 iPhysExt = pPhysExt->iNext;
2376 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2377
2378 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2379 pPool->iPhysExtFreeHead = iPhysExtStart;
2380}
2381
2382/**
2383 * Insert a reference into a list of physical cross reference extents.
2384 *
2385 * @returns The new ram range flags (top 16-bits).
2386 *
2387 * @param pVM The VM handle.
2388 * @param iPhysExt The physical extent index of the list head.
2389 * @param iShwPT The shadow page table index.
2390 *
2391 */
2392static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2393{
2394 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2395 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2396
2397 /* special common case. */
2398 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2399 {
2400 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2401 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2402 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2403 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2404 }
2405
2406 /* general treatment. */
2407 const uint16_t iPhysExtStart = iPhysExt;
2408 unsigned cMax = 15;
2409 for (;;)
2410 {
2411 Assert(iPhysExt < pPool->cMaxPhysExts);
2412 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2413 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2414 {
2415 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2416 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2417 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2418 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2419 }
2420 if (!--cMax)
2421 {
2422 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2423 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2424 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2425 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2426 }
2427 }
2428
2429 /* add another extent to the list. */
2430 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2431 if (!pNew)
2432 {
2433 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2434 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2435 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2436 }
2437 pNew->iNext = iPhysExtStart;
2438 pNew->aidx[0] = iShwPT;
2439 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2440 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2441}
2442
2443
2444/**
2445 * Add a reference to guest physical page where extents are in use.
2446 *
2447 * @returns The new ram range flags (top 16-bits).
2448 *
2449 * @param pVM The VM handle.
2450 * @param u16 The ram range flags (top 16-bits).
2451 * @param iShwPT The shadow page table index.
2452 */
2453uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2454{
2455 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2456 {
2457 /*
2458 * Convert to extent list.
2459 */
2460 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2461 uint16_t iPhysExt;
2462 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2463 if (pPhysExt)
2464 {
2465 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2466 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2467 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2468 pPhysExt->aidx[1] = iShwPT;
2469 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2470 }
2471 else
2472 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2473 }
2474 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2475 {
2476 /*
2477 * Insert into the extent list.
2478 */
2479 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2480 }
2481 else
2482 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2483 return u16;
2484}
2485
2486
2487/**
2488 * Clear references to guest physical memory.
2489 *
2490 * @param pPool The pool.
2491 * @param pPage The page.
2492 * @param pHCPhys Pointer to the aHCPhys entry in the ram range.
2493 */
2494void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PRTHCPHYS pHCPhys)
2495{
2496 const unsigned cRefs = *pHCPhys >> MM_RAM_FLAGS_CREFS_SHIFT;
2497 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d *pHCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, *pHCPhys, pPage, pPage->idx));
2498
2499 uint16_t iPhysExt = (*pHCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2500 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2501 {
2502 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2503 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2504 do
2505 {
2506 Assert(iPhysExt < pPool->cMaxPhysExts);
2507
2508 /*
2509 * Look for the shadow page and check if it's all freed.
2510 */
2511 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2512 {
2513 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2514 {
2515 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2516
2517 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2518 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2519 {
2520 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d\n", *pHCPhys, pPage->idx));
2521 return;
2522 }
2523
2524 /* we can free the node. */
2525 PVM pVM = pPool->CTXSUFF(pVM);
2526 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2527 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2528 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2529 {
2530 /* lonely node */
2531 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2532 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d lonely\n", *pHCPhys, pPage->idx));
2533 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2534 }
2535 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2536 {
2537 /* head */
2538 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d head\n", *pHCPhys, pPage->idx));
2539 *pHCPhys = (*pHCPhys & MM_RAM_FLAGS_NO_REFS_MASK)
2540 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2541 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2542 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2543 }
2544 else
2545 {
2546 /* in list */
2547 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d\n", *pHCPhys, pPage->idx));
2548 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2549 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2550 }
2551 iPhysExt = iPhysExtNext;
2552 return;
2553 }
2554 }
2555
2556 /* next */
2557 iPhysExtPrev = iPhysExt;
2558 iPhysExt = paPhysExts[iPhysExt].iNext;
2559 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2560
2561 AssertFatalMsgFailed(("not-found! cRefs=%d *pHCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, *pHCPhys, pPage, pPage->idx));
2562 }
2563 else /* nothing to do */
2564 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64\n", *pHCPhys));
2565}
2566
2567
2568
2569/**
2570 * Clear references to guest physical memory.
2571 *
2572 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2573 * is assumed to be correct, so the linear search can be skipped and we can assert
2574 * at an earlier point.
2575 *
2576 * @param pPool The pool.
2577 * @param pPage The page.
2578 * @param HCPhys The host physical address corresponding to the guest page.
2579 * @param GCPhys The guest physical address corresponding to HCPhys.
2580 */
2581static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2582{
2583 /*
2584 * Walk range list.
2585 */
2586 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2587 while (pRam)
2588 {
2589 RTGCPHYS off = GCPhys - pRam->GCPhys;
2590 if (off < pRam->cb)
2591 {
2592 /* does it match? */
2593 const unsigned iPage = off >> PAGE_SHIFT;
2594 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2595 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2596 {
2597 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2598 return;
2599 }
2600 break;
2601 }
2602 pRam = CTXSUFF(pRam->pNext);
2603 }
2604 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2605}
2606
2607
2608/**
2609 * Clear references to guest physical memory.
2610 *
2611 * @param pPool The pool.
2612 * @param pPage The page.
2613 * @param HCPhys The host physical address corresponding to the guest page.
2614 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2615 */
2616static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2617{
2618 /*
2619 * Walk range list.
2620 */
2621 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2622 while (pRam)
2623 {
2624 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2625 if (off < pRam->cb)
2626 {
2627 /* does it match? */
2628 const unsigned iPage = off >> PAGE_SHIFT;
2629 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2630 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2631 {
2632 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2633 return;
2634 }
2635 break;
2636 }
2637 pRam = CTXSUFF(pRam->pNext);
2638 }
2639
2640 /*
2641 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2642 */
2643 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2644 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2645 while (pRam)
2646 {
2647 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2648 while (iPage-- > 0)
2649 {
2650 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2651 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2652 {
2653 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2654 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2655 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2656 return;
2657 }
2658 }
2659 pRam = CTXSUFF(pRam->pNext);
2660 }
2661
2662 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2663}
2664
2665
2666/**
2667 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2668 *
2669 * @param pPool The pool.
2670 * @param pPage The page.
2671 * @param pShwPT The shadow page table (mapping of the page).
2672 * @param pGstPT The guest page table.
2673 */
2674DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2675{
2676 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2677 if (pShwPT->a[i].n.u1Present)
2678 {
2679 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2680 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2681 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2682 if (!--pPage->cPresent)
2683 break;
2684 }
2685}
2686
2687
2688/**
2689 * Clear references to guest physical memory in a PAE / 32-bit page table.
2690 *
2691 * @param pPool The pool.
2692 * @param pPage The page.
2693 * @param pShwPT The shadow page table (mapping of the page).
2694 * @param pGstPT The guest page table (just a half one).
2695 */
2696DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2697{
2698 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2699 if (pShwPT->a[i].n.u1Present)
2700 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2701}
2702
2703
2704/**
2705 * Clear references to guest physical memory in a PAE / PAE page table.
2706 *
2707 * @param pPool The pool.
2708 * @param pPage The page.
2709 * @param pShwPT The shadow page table (mapping of the page).
2710 * @param pGstPT The guest page table.
2711 */
2712DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2713{
2714 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2715 if (pShwPT->a[i].n.u1Present)
2716 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2717}
2718
2719
2720/**
2721 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2722 *
2723 * @param pPool The pool.
2724 * @param pPage The page.
2725 * @param pShwPT The shadow page table (mapping of the page).
2726 * @param pGstPT The guest page table.
2727 */
2728DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2729{
2730 RTGCPHYS GCPhys = pPage->GCPhys;
2731 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2732 if (pShwPT->a[i].n.u1Present)
2733 {
2734 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2735 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2736 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2737 }
2738}
2739
2740
2741/**
2742 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2743 *
2744 * @param pPool The pool.
2745 * @param pPage The page.
2746 * @param pShwPT The shadow page table (mapping of the page).
2747 */
2748DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2749{
2750 RTGCPHYS GCPhys = pPage->GCPhys;
2751 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2752 if (pShwPT->a[i].n.u1Present)
2753 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2754}
2755#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2756
2757/**
2758 * Clear references to shadowed pages in a PAE page directory.
2759 *
2760 * @param pPool The pool.
2761 * @param pPage The page.
2762 * @param pShwPD The shadow page directory (mapping of the page).
2763 */
2764DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2765{
2766 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2767 {
2768 if (pShwPD->a[i].n.u1Present)
2769 {
2770 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2771 if (pSubPage)
2772 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2773 else
2774 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2775 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2776 }
2777 }
2778}
2779
2780
2781/**
2782 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2783 *
2784 * @param pPool The pool.
2785 * @param pPage The page.
2786 * @param pShwPdPtr The shadow page directory pointer table (mapping of the page).
2787 */
2788DECLINLINE(void) pgmPoolTrackDerefPDPTR64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPTR pShwPdPtr)
2789{
2790 for (unsigned i = 0; i < ELEMENTS(pShwPdPtr->a); i++)
2791 {
2792 if (pShwPdPtr->a[i].n.u1Present)
2793 {
2794 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPdPtr->a[i].u & X86_PDPE_PG_MASK);
2795 if (pSubPage)
2796 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2797 else
2798 AssertFatalMsgFailed(("%RX64\n", pShwPdPtr->a[i].u & X86_PDPE_PG_MASK));
2799 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2800 }
2801 }
2802}
2803
2804
2805/**
2806 * Clears all references made by this page.
2807 *
2808 * This includes other shadow pages and GC physical addresses.
2809 *
2810 * @param pPool The pool.
2811 * @param pPage The page.
2812 */
2813static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2814{
2815 /*
2816 * Map the shadow page and take action according to the page kind.
2817 */
2818 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2819 switch (pPage->enmKind)
2820 {
2821#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2822 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2823 {
2824 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2825 void *pvGst;
2826 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2827 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2828 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2829 break;
2830 }
2831
2832 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2833 {
2834 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2835 void *pvGst;
2836 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2837 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2838 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2839 break;
2840 }
2841
2842 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2843 {
2844 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2845 void *pvGst;
2846 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2847 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
2848 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2849 break;
2850 }
2851
2852 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2853 {
2854 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2855 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
2856 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2857 break;
2858 }
2859
2860 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2861 {
2862 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2863 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
2864 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2865 break;
2866 }
2867
2868#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2869 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2870 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2871 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2872 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2873 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2874 break;
2875#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2876
2877 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2878 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2879 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
2880 break;
2881
2882 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2883 pgmPoolTrackDerefPDPTR64Bit(pPool, pPage, (PX86PDPTR)pvShw);
2884 break;
2885
2886 default:
2887 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
2888 }
2889
2890 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
2891 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2892 ASMMemZeroPage(pvShw);
2893 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2894 pPage->fZeroed = true;
2895}
2896#endif /* PGMPOOL_WITH_USER_TRACKING */
2897
2898
2899/**
2900 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
2901 *
2902 * @param pPool The pool.
2903 */
2904static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
2905{
2906 /*
2907 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
2908 */
2909 Assert(NIL_PGMPOOL_IDX == 0);
2910 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
2911 {
2912 /*
2913 * Get the page address.
2914 */
2915 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2916 union
2917 {
2918 uint64_t *pau64;
2919 uint32_t *pau32;
2920 } u;
2921 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2922
2923 /*
2924 * Mark stuff not present.
2925 */
2926 switch (pPage->enmKind)
2927 {
2928 case PGMPOOLKIND_ROOT_32BIT_PD:
2929 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
2930 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2931 u.pau32[iPage] = 0;
2932 break;
2933
2934 case PGMPOOLKIND_ROOT_PAE_PD:
2935 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * 4; iPage++)
2936 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2937 u.pau64[iPage] = 0;
2938 break;
2939
2940 case PGMPOOLKIND_ROOT_PML4:
2941 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
2942 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
2943 u.pau64[iPage] = 0;
2944 break;
2945
2946 case PGMPOOLKIND_ROOT_PDPTR:
2947 /* Not root of shadowed pages currently, ignore it. */
2948 break;
2949 }
2950 }
2951
2952 /*
2953 * Paranoia (to be removed), flag a global CR3 sync.
2954 */
2955 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
2956}
2957
2958
2959/**
2960 * Flushes the entire cache.
2961 *
2962 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
2963 * and execute this CR3 flush.
2964 *
2965 * @param pPool The pool.
2966 */
2967static void pgmPoolFlushAllInt(PPGMPOOL pPool)
2968{
2969 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
2970 LogFlow(("pgmPoolFlushAllInt:\n"));
2971
2972 /*
2973 * If there are no pages in the pool, there is nothing to do.
2974 */
2975 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
2976 {
2977 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
2978 return;
2979 }
2980
2981 /*
2982 * Nuke the free list and reinsert all pages into it.
2983 */
2984 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
2985 {
2986 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2987
2988#ifdef IN_RING3
2989 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
2990#endif
2991#ifdef PGMPOOL_WITH_MONITORING
2992 if (pPage->fMonitored)
2993 pgmPoolMonitorFlush(pPool, pPage);
2994 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2995 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2996 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2997 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2998 pPage->cModifications = 0;
2999#endif
3000 pPage->GCPhys = NIL_RTGCPHYS;
3001 pPage->enmKind = PGMPOOLKIND_FREE;
3002 Assert(pPage->idx == i);
3003 pPage->iNext = i + 1;
3004 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3005 pPage->fSeenNonGlobal = false;
3006 pPage->fMonitored= false;
3007 pPage->fCached = false;
3008 pPage->fReusedFlushPending = false;
3009 pPage->fCR3Mix = false;
3010#ifdef PGMPOOL_WITH_USER_TRACKING
3011 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3012#endif
3013#ifdef PGMPOOL_WITH_CACHE
3014 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3015 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3016#endif
3017 }
3018 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3019 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3020 pPool->cUsedPages = 0;
3021
3022#ifdef PGMPOOL_WITH_USER_TRACKING
3023 /*
3024 * Zap and reinitialize the user records.
3025 */
3026 pPool->cPresent = 0;
3027 pPool->iUserFreeHead = 0;
3028 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3029 const unsigned cMaxUsers = pPool->cMaxUsers;
3030 for (unsigned i = 0; i < cMaxUsers; i++)
3031 {
3032 paUsers[i].iNext = i + 1;
3033 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3034 paUsers[i].iUserTable = 0xfffe;
3035 }
3036 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3037#endif
3038
3039#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3040 /*
3041 * Clear all the GCPhys links and rebuild the phys ext free list.
3042 */
3043 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
3044 pRam;
3045 pRam = pRam->CTXSUFF(pNext))
3046 {
3047 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3048 while (iPage-- > 0)
3049 pRam->aHCPhys[iPage] &= MM_RAM_FLAGS_NO_REFS_MASK;
3050 }
3051
3052 pPool->iPhysExtFreeHead = 0;
3053 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3054 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3055 for (unsigned i = 0; i < cMaxPhysExts; i++)
3056 {
3057 paPhysExts[i].iNext = i + 1;
3058 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3059 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3060 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3061 }
3062 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3063#endif
3064
3065#ifdef PGMPOOL_WITH_MONITORING
3066 /*
3067 * Just zap the modified list.
3068 */
3069 pPool->cModifiedPages = 0;
3070 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3071#endif
3072
3073#ifdef PGMPOOL_WITH_CACHE
3074 /*
3075 * Clear the GCPhys hash and the age list.
3076 */
3077 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3078 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3079 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3080 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3081#endif
3082
3083 /*
3084 * Flush all the special root pages.
3085 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3086 */
3087 pgmPoolFlushAllSpecialRoots(pPool);
3088 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3089 {
3090 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3091 pPage->iNext = NIL_PGMPOOL_IDX;
3092#ifdef PGMPOOL_WITH_MONITORING
3093 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3094 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3095 pPage->cModifications = 0;
3096 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3097 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3098 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3099 if (pPage->fMonitored)
3100 {
3101 PVM pVM = pPool->CTXSUFF(pVM);
3102 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3103 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pPage),
3104 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pPage),
3105 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pPage),
3106 pPool->pszAccessHandler);
3107 AssertFatalRCSuccess(rc);
3108# ifdef PGMPOOL_WITH_CACHE
3109 pgmPoolHashInsert(pPool, pPage);
3110# endif
3111 }
3112#endif
3113#ifdef PGMPOOL_WITH_USER_TRACKING
3114 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3115#endif
3116#ifdef PGMPOOL_WITH_CACHE
3117 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3118 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3119#endif
3120 }
3121
3122 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3123}
3124
3125
3126/**
3127 * Flushes a pool page.
3128 *
3129 * This moves the page to the free list after removing all user references to it.
3130 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3131 *
3132 * @returns VBox status code.
3133 * @retval VINF_SUCCESS on success.
3134 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3135 * @param pPool The pool.
3136 * @param HCPhys The HC physical address of the shadow page.
3137 */
3138int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3139{
3140 int rc = VINF_SUCCESS;
3141 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3142 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3143 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3144
3145 /*
3146 * Quietly reject any attempts at flushing any of the special root pages.
3147 */
3148 if (pPage->idx < PGMPOOL_IDX_FIRST)
3149 {
3150 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3151 return VINF_SUCCESS;
3152 }
3153
3154 /*
3155 * Mark the page as being in need of a ASMMemZeroPage().
3156 */
3157 pPage->fZeroed = false;
3158
3159#ifdef PGMPOOL_WITH_USER_TRACKING
3160 /*
3161 * Clear the page.
3162 */
3163 pgmPoolTrackClearPageUsers(pPool, pPage);
3164 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3165 pgmPoolTrackDeref(pPool, pPage);
3166 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3167#endif
3168
3169#ifdef PGMPOOL_WITH_CACHE
3170 /*
3171 * Flush it from the cache.
3172 */
3173 pgmPoolCacheFlushPage(pPool, pPage);
3174#endif /* PGMPOOL_WITH_CACHE */
3175
3176#ifdef PGMPOOL_WITH_MONITORING
3177 /*
3178 * Deregistering the monitoring.
3179 */
3180 if (pPage->fMonitored)
3181 rc = pgmPoolMonitorFlush(pPool, pPage);
3182#endif
3183
3184 /*
3185 * Free the page.
3186 */
3187 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3188 pPage->iNext = pPool->iFreeHead;
3189 pPool->iFreeHead = pPage->idx;
3190 pPage->enmKind = PGMPOOLKIND_FREE;
3191 pPage->GCPhys = NIL_RTGCPHYS;
3192 pPage->fReusedFlushPending = false;
3193
3194 pPool->cUsedPages--;
3195 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3196 return rc;
3197}
3198
3199
3200/**
3201 * Frees a usage of a pool page.
3202 *
3203 * The caller is responsible to updating the user table so that it no longer
3204 * references the shadow page.
3205 *
3206 * @param pPool The pool.
3207 * @param HCPhys The HC physical address of the shadow page.
3208 * @param iUser The shadow page pool index of the user table.
3209 * @param iUserTable The index into the user table (shadowed).
3210 */
3211void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3212{
3213 STAM_PROFILE_START(&pPool->StatFree, a);
3214 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3215 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3216 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3217#ifdef PGMPOOL_WITH_USER_TRACKING
3218 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3219#endif
3220#ifdef PGMPOOL_WITH_CACHE
3221 if (!pPage->fCached)
3222#endif
3223 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3224 STAM_PROFILE_STOP(&pPool->StatFree, a);
3225}
3226
3227
3228/**
3229 * Makes one or more free page free.
3230 *
3231 * @returns VBox status code.
3232 * @retval VINF_SUCCESS on success.
3233 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3234 *
3235 * @param pPool The pool.
3236 * @param iUser The user of the page.
3237 */
3238static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3239{
3240 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3241
3242 /*
3243 * If the pool isn't full grown yet, expand it.
3244 */
3245 if (pPool->cCurPages < pPool->cMaxPages)
3246 {
3247 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3248#ifdef IN_RING3
3249 int rc = PGMR3PoolGrow(pPool->pVMHC);
3250#else
3251 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3252#endif
3253 if (VBOX_FAILURE(rc))
3254 return rc;
3255 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3256 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3257 return VINF_SUCCESS;
3258 }
3259
3260#ifdef PGMPOOL_WITH_CACHE
3261 /*
3262 * Free one cached page.
3263 */
3264 return pgmPoolCacheFreeOne(pPool, iUser);
3265#else
3266 /*
3267 * Flush the pool.
3268 * If we have tracking enabled, it should be possible to come up with
3269 * a cheap replacement strategy...
3270 */
3271 pgmPoolFlushAllInt(pPool);
3272 return VERR_PGM_POOL_FLUSHED;
3273#endif
3274}
3275
3276
3277/**
3278 * Allocates a page from the pool.
3279 *
3280 * This page may actually be a cached page and not in need of any processing
3281 * on the callers part.
3282 *
3283 * @returns VBox status code.
3284 * @retval VINF_SUCCESS if a NEW page was allocated.
3285 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3286 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3287 * @param pVM The VM handle.
3288 * @param GCPhys The GC physical address of the page we're gonna shadow.
3289 * For 4MB and 2MB PD entries, it's the first address the
3290 * shadow PT is covering.
3291 * @param enmKind The kind of mapping.
3292 * @param iUser The shadow page pool index of the user table.
3293 * @param iUserTable The index into the user table (shadowed).
3294 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3295 */
3296int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3297{
3298 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3299 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3300 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3301
3302 *ppPage = NULL;
3303
3304#ifdef PGMPOOL_WITH_CACHE
3305 if (pPool->fCacheEnabled)
3306 {
3307 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3308 if (VBOX_SUCCESS(rc2))
3309 {
3310 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3311 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3312 return rc2;
3313 }
3314 }
3315#endif
3316
3317 /*
3318 * Allocate a new one.
3319 */
3320 int rc = VINF_SUCCESS;
3321 uint16_t iNew = pPool->iFreeHead;
3322 if (iNew == NIL_PGMPOOL_IDX)
3323 {
3324 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3325 if (VBOX_FAILURE(rc))
3326 {
3327 if (rc != VERR_PGM_POOL_CLEARED)
3328 {
3329 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3330 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3331 return rc;
3332 }
3333 rc = VERR_PGM_POOL_FLUSHED;
3334 }
3335 iNew = pPool->iFreeHead;
3336 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3337 }
3338
3339 /* unlink the free head */
3340 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3341 pPool->iFreeHead = pPage->iNext;
3342 pPage->iNext = NIL_PGMPOOL_IDX;
3343
3344 /*
3345 * Initialize it.
3346 */
3347 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3348 pPage->enmKind = enmKind;
3349 pPage->GCPhys = GCPhys;
3350 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3351 pPage->fMonitored = false;
3352 pPage->fCached = false;
3353 pPage->fReusedFlushPending = false;
3354 pPage->fCR3Mix = false;
3355#ifdef PGMPOOL_WITH_MONITORING
3356 pPage->cModifications = 0;
3357 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3358 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3359#endif
3360#ifdef PGMPOOL_WITH_USER_TRACKING
3361 pPage->cPresent = 0;
3362 pPage->iFirstPresent = ~0;
3363
3364 /*
3365 * Insert into the tracking and cache. If this fails, free the page.
3366 */
3367 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3368 if (VBOX_FAILURE(rc3))
3369 {
3370 if (rc3 != VERR_PGM_POOL_CLEARED)
3371 {
3372 pPool->cUsedPages--;
3373 pPage->enmKind = PGMPOOLKIND_FREE;
3374 pPage->GCPhys = NIL_RTGCPHYS;
3375 pPage->iNext = pPool->iFreeHead;
3376 pPool->iFreeHead = pPage->idx;
3377 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3378 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3379 return rc3;
3380 }
3381 rc = VERR_PGM_POOL_FLUSHED;
3382 }
3383#endif /* PGMPOOL_WITH_USER_TRACKING */
3384
3385 /*
3386 * Commit the allocation, clear the page and return.
3387 */
3388#ifdef VBOX_WITH_STATISTICS
3389 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3390 pPool->cUsedPagesHigh = pPool->cUsedPages;
3391#endif
3392
3393 if (!pPage->fZeroed)
3394 {
3395 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3396 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3397 ASMMemZeroPage(pv);
3398 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3399 }
3400
3401 *ppPage = pPage;
3402 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3403 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3404 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3405 return rc;
3406}
3407
3408
3409/**
3410 * Frees a usage of a pool page.
3411 *
3412 * @param pVM The VM handle.
3413 * @param HCPhys The HC physical address of the shadow page.
3414 * @param iUser The shadow page pool index of the user table.
3415 * @param iUserTable The index into the user table (shadowed).
3416 */
3417void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3418{
3419 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3420 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3421 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3422}
3423
3424
3425/**
3426 * Gets a in-use page in the pool by it's physical address.
3427 *
3428 * @returns Pointer to the page.
3429 * @param pVM The VM handle.
3430 * @param HCPhys The HC physical address of the shadow page.
3431 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3432 */
3433PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3434{
3435 /** @todo profile this! */
3436 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3437 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3438 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3439 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3440 return pPage;
3441}
3442
3443
3444/**
3445 * Flushes the entire cache.
3446 *
3447 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3448 * and execute this CR3 flush.
3449 *
3450 * @param pPool The pool.
3451 */
3452void pgmPoolFlushAll(PVM pVM)
3453{
3454 LogFlow(("pgmPoolFlushAll:\n"));
3455 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3456}
3457
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette