VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 2096

Last change on this file since 2096 was 2091, checked in by vboxsync, 18 years ago

space

  • Property svn:keywords set to Id
File size: 118.7 KB
Line 
1/* $Id: PGMAllPool.cpp 2091 2007-04-14 15:24:38Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006 InnoTek Systemberatung GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * If you received this file as part of a commercial VirtualBox
18 * distribution, then only the terms of your commercial VirtualBox
19 * license agreement apply instead of the previous paragraph.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37
38#include <VBox/log.h>
39#include <VBox/err.h>
40#include <iprt/asm.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46__BEGIN_DECLS
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48#ifdef PGMPOOL_WITH_USER_TRACKING
49DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
50DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
51static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#endif
53#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
54static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
55#endif
56#ifdef PGMPOOL_WITH_CACHE
57static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
58#endif
59#ifdef PGMPOOL_WITH_MONITORING
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#endif
62#ifndef IN_RING3
63DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
64#endif
65__END_DECLS
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88#ifdef IN_GC
89/**
90 * Maps a pool page into the current context.
91 *
92 * @returns Pointer to the mapping.
93 * @param pVM The VM handle.
94 * @param pPage The page to map.
95 */
96void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
97{
98 /* general pages. */
99 if (pPage->idx >= PGMPOOL_IDX_FIRST)
100 {
101 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
102 void *pv;
103 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
104 AssertReleaseRC(rc);
105 return pv;
106 }
107
108 /* special pages. */
109 switch (pPage->idx)
110 {
111 case PGMPOOL_IDX_PD:
112 return pVM->pgm.s.pGC32BitPD;
113 case PGMPOOL_IDX_PAE_PD:
114 return pVM->pgm.s.apGCPaePDs[0];
115 case PGMPOOL_IDX_PDPTR:
116 return pVM->pgm.s.pGCPaePDPTR;
117 case PGMPOOL_IDX_PML4:
118 return pVM->pgm.s.pGCPaePML4;
119 default:
120 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
121 return NULL;
122 }
123}
124#endif /* IN_GC */
125
126
127#ifdef PGMPOOL_WITH_MONITORING
128/**
129 * Determin the size of a write instruction.
130 * @returns number of bytes written.
131 * @param pDis The disassembler state.
132 */
133static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
134{
135 /*
136 * This is very crude and possibly wrong for some opcodes,
137 * but since it's not really supposed to be called we can
138 * probably live with that.
139 */
140 return DISGetParamSize(pDis, &pDis->param1);
141}
142
143
144/**
145 * Flushes a chain of pages sharing the same access monitor.
146 *
147 * @returns VBox status code suitable for scheduling.
148 * @param pPool The pool.
149 * @param pPage A page in the chain.
150 */
151int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
152{
153 /*
154 * Find the list head.
155 */
156 uint16_t idx = pPage->idx;
157 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
158 {
159 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
160 {
161 idx = pPage->iMonitoredPrev;
162 Assert(idx != pPage->idx);
163 pPage = &pPool->aPages[idx];
164 }
165 }
166
167 /*
168 * Itereate the list flushing each shadow page.
169 */
170 int rc = VINF_SUCCESS;
171 for (;;)
172 {
173 idx = pPage->iMonitoredNext;
174 Assert(idx != pPage->idx);
175 if (pPage->idx >= PGMPOOL_IDX_FIRST)
176 {
177 int rc2 = pgmPoolFlushPage(pPool, pPage);
178 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
179 rc = VINF_PGM_SYNC_CR3;
180 }
181 /* next */
182 if (idx == NIL_PGMPOOL_IDX)
183 break;
184 pPage = &pPool->aPages[idx];
185 }
186 return rc;
187}
188
189
190/**
191 * Wrapper for getting the current context pointer to the entry begin modified.
192 *
193 * @returns Pointer to the current context mapping of the entry.
194 * @param pPool The pool.
195 * @param pvFault The fault virtual address.
196 * @param GCPhysFault The fault physical address.
197 * @param cbEntry The entry size.
198 */
199#ifdef IN_RING3
200DECLINLINE(void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
201#else
202DECLINLINE(void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
203#endif
204{
205#ifdef IN_GC
206 return (RTGCPTR)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
207
208#elif defined(IN_RING0)
209 void *pvRet;
210 int rc = PGMRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
211 AssertFatalRCSuccess(rc);
212 return pvRet;
213
214#elif defined(IN_RING3)
215 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
216#else
217# error "huh?"
218#endif
219}
220
221
222/**
223 * Process shadow entries before they are changed by the guest.
224 *
225 * For PT entries we will clear them. For PD entries, we'll simply check
226 * for mapping conflicts and set the SyncCR3 FF if found.
227 *
228 * @param pPool The pool.
229 * @param pPage The head page.
230 * @param GCPhysFault The guest physical fault address.
231 * @param uAddress In R0 and GC this is the guest context fault address (flat).
232 * In R3 this is the host context 'fault' address.
233 * @param pCpu The disassembler state for figuring out the write size.
234 * This need not be specified if the caller knows we won't do cross entry accesses.
235 */
236#ifdef IN_RING3
237void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
238#else
239void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
240#endif
241{
242 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
243 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
244 for (;;)
245 {
246 union
247 {
248 void *pv;
249 PX86PT pPT;
250 PX86PTPAE pPTPae;
251 PX86PD pPD;
252 PX86PDPAE pPDPae;
253 } uShw;
254 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
255
256 switch (pPage->enmKind)
257 {
258 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
259 {
260 const unsigned iShw = off / sizeof(X86PTE);
261 if (uShw.pPT->a[iShw].n.u1Present)
262 {
263# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
264 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
265 pgmPoolTracDerefGCPhysHint(pPool, pPage,
266 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
267 pGstPte->u & X86_PTE_PG_MASK);
268# endif
269 uShw.pPT->a[iShw].u = 0;
270 }
271 break;
272 }
273
274 /* page/2 sized */
275 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
276 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
277 {
278 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
279 if (uShw.pPTPae->a[iShw].n.u1Present)
280 {
281# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
282 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
283 pgmPoolTracDerefGCPhysHint(pPool, pPage,
284 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
285 pGstPte->u & X86_PTE_PG_MASK);
286# endif
287 uShw.pPTPae->a[iShw].u = 0;
288 }
289 }
290 break;
291
292 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
293 {
294 const unsigned iShw = off / sizeof(X86PTPAE);
295 if (uShw.pPTPae->a[iShw].n.u1Present)
296 {
297# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
298 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
299 pgmPoolTracDerefGCPhysHint(pPool, pPage,
300 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
301 pGstPte->u & X86_PTE_PAE_PG_MASK);
302# endif
303 uShw.pPTPae->a[iShw].u = 0;
304 }
305 break;
306 }
307
308 case PGMPOOLKIND_ROOT_32BIT_PD:
309 {
310 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
311 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
312 {
313 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
314 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
315 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
316 }
317 /* paranoia / a bit assumptive. */
318 else if ( pCpu
319 && (off & 4)
320 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
321 {
322 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
323 if ( iShw2 != iShw
324 && iShw2 < ELEMENTS(uShw.pPD->a)
325 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
326 {
327 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
328 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
329 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
330 }
331 }
332#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
333 if ( uShw.pPD->a[iShw].n.u1Present
334 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
335 {
336 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
337# ifdef IN_GC /* TLB load - we're pushing things a bit... */
338 ASMProbeReadByte(pvAddress);
339# endif
340 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
341 uShw.pPD->a[iShw].u = 0;
342 }
343#endif
344 break;
345 }
346
347 case PGMPOOLKIND_ROOT_PAE_PD:
348 {
349 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
350 for (unsigned i = 0; i < 2; i++, iShw++)
351 {
352 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
353 {
354 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
355 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
356 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
357 }
358 /* paranoia / a bit assumptive. */
359 else if ( pCpu
360 && (off & 4)
361 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
362 {
363 const unsigned iShw2 = iShw + 2;
364 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
365 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
366 {
367 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
368 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
369 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
370 }
371 }
372#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
373 if ( uShw.pPDPae->a[iShw].n.u1Present
374 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
375 {
376 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
377# ifdef IN_GC /* TLB load - we're pushing things a bit... */
378 ASMProbeReadByte(pvAddress);
379# endif
380 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
381 uShw.pPDPae->a[iShw].u = 0;
382 }
383#endif
384 }
385 break;
386 }
387
388 default:
389 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
390 }
391
392 /* next */
393 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
394 return;
395 pPage = &pPool->aPages[pPage->iMonitoredNext];
396 }
397}
398
399
400# ifndef IN_RING3
401/**
402 * Checks if a access could be a fork operation in progress.
403 *
404 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
405 *
406 * @returns true if it's likly that we're forking, otherwise false.
407 * @param pPool The pool.
408 * @param pCpu The disassembled instruction.
409 * @param offFault The access offset.
410 */
411DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
412{
413 /*
414 * i386 linux is using btr to clear X86_PTE_RW.
415 * The functions involved are (2.6.16 source inspection):
416 * clear_bit
417 * ptep_set_wrprotect
418 * copy_one_pte
419 * copy_pte_range
420 * copy_pmd_range
421 * copy_pud_range
422 * copy_page_range
423 * dup_mmap
424 * dup_mm
425 * copy_mm
426 * copy_process
427 * do_fork
428 */
429 if ( pCpu->pCurInstr->opcode == OP_BTR
430 && !(offFault & 4)
431 /** @todo Validate that the bit index is X86_PTE_RW. */
432 )
433 {
434 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
435 return true;
436 }
437 return false;
438}
439
440
441/**
442 * Determin whether the page is likely to have been reused.
443 *
444 * @returns true if we consider the page as being reused for a different purpose.
445 * @returns false if we consider it to still be a paging page.
446 * @param pPage The page in question.
447 * @param pCpu The disassembly info for the faulting insturction.
448 * @param pvFault The fault address.
449 *
450 * @remark The REP prefix check is left to the caller because of STOSD/W.
451 */
452DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
453{
454 switch (pCpu->pCurInstr->opcode)
455 {
456 case OP_PUSH:
457 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
458 return true;
459 case OP_PUSHF:
460 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
461 return true;
462 case OP_PUSHA:
463 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
464 return true;
465 case OP_FXSAVE:
466 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
467 return true;
468 }
469 if ( (pCpu->param1.flags & USE_REG_GEN32)
470 && (pCpu->param1.base.reg_gen32 == USE_REG_ESP))
471 {
472 Log4(("pgmPoolMonitorIsReused: ESP\n"));
473 return true;
474 }
475
476 //if (pPage->fCR3Mix)
477 // return false;
478 return false;
479}
480
481
482/**
483 * Flushes the page being accessed.
484 *
485 * @returns VBox status code suitable for scheduling.
486 * @param pVM The VM handle.
487 * @param pPool The pool.
488 * @param pPage The pool page (head).
489 * @param pCpu The disassembly of the write instruction.
490 * @param pRegFrame The trap register frame.
491 * @param GCPhysFault The fault address as guest physical address.
492 * @param pvFault The fault address.
493 */
494static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
495 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
496{
497 /*
498 * First, do the flushing.
499 */
500 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
501
502 /*
503 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
504 */
505 uint32_t cbWritten;
506 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
507 if (VBOX_SUCCESS(rc2))
508 pRegFrame->eip += pCpu->opsize;
509 else if (rc2 == VERR_EM_INTERPRETER)
510 {
511#ifdef IN_GC
512 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip))
513 {
514 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04:%RGv, ignoring.\n",
515 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
516 rc = VINF_SUCCESS;
517 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
518 }
519 else
520#endif
521 {
522 rc = VINF_EM_RAW_EMULATE_INSTR;
523 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
524 }
525 }
526 else
527 rc = rc2;
528
529 /* See use in pgmPoolAccessHandlerSimple(). */
530 PGM_INVL_GUEST_TLBS();
531
532 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
533 return rc;
534
535}
536
537
538/**
539 * Handles the STOSD write accesses.
540 *
541 * @returns VBox status code suitable for scheduling.
542 * @param pVM The VM handle.
543 * @param pPool The pool.
544 * @param pPage The pool page (head).
545 * @param pCpu The disassembly of the write instruction.
546 * @param pRegFrame The trap register frame.
547 * @param GCPhysFault The fault address as guest physical address.
548 * @param pvFault The fault address.
549 */
550DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
551 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
552{
553 /*
554 * Increment the modification counter and insert it into the list
555 * of modified pages the first time.
556 */
557 if (!pPage->cModifications++)
558 pgmPoolMonitorModifiedInsert(pPool, pPage);
559
560 /*
561 * Execute REP STOSD.
562 *
563 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
564 * write situation, meaning that it's safe to write here.
565 */
566#ifdef IN_GC
567 uint32_t *pu32 = (uint32_t *)pvFault;
568#else
569 RTGCPTR pu32 = pvFault;
570#endif
571 while (pRegFrame->ecx)
572 {
573 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pu32, NULL);
574#ifdef IN_GC
575 *pu32++ = pRegFrame->eax;
576#else
577 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
578 pu32 += 4;
579#endif
580 GCPhysFault += 4;
581 pRegFrame->edi += 4;
582 pRegFrame->ecx--;
583 }
584 pRegFrame->eip += pCpu->opsize;
585
586 /* See use in pgmPoolAccessHandlerSimple(). */
587 PGM_INVL_GUEST_TLBS();
588
589 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
590 return VINF_SUCCESS;
591}
592
593
594/**
595 * Handles the simple write accesses.
596 *
597 * @returns VBox status code suitable for scheduling.
598 * @param pVM The VM handle.
599 * @param pPool The pool.
600 * @param pPage The pool page (head).
601 * @param pCpu The disassembly of the write instruction.
602 * @param pRegFrame The trap register frame.
603 * @param GCPhysFault The fault address as guest physical address.
604 * @param pvFault The fault address.
605 */
606DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
607 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
608{
609 /*
610 * Increment the modification counter and insert it into the list
611 * of modified pages the first time.
612 */
613 if (!pPage->cModifications++)
614 pgmPoolMonitorModifiedInsert(pPool, pPage);
615
616 /*
617 * Clear all the pages. ASSUMES that pvFault is readable.
618 */
619 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
620
621 /*
622 * Interpret the instruction.
623 */
624 uint32_t cb;
625 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
626 if (VBOX_SUCCESS(rc))
627 pRegFrame->eip += pCpu->opsize;
628 else if (rc == VERR_EM_INTERPRETER)
629 {
630# ifdef IN_GC
631 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)(RTGCUINTPTR)pCpu->opaddr))
632 {
633 /* We're not able to handle this in ring-3, so fix the interpreter! */
634 /** @note Should be fine. There's no need to flush the whole thing. */
635#ifndef DEBUG_sandervl
636 AssertMsgFailed(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
637 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
638#endif
639 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch1);
640 rc = pgmPoolMonitorChainFlush(pPool, pPage);
641 }
642 else
643# endif
644 {
645 rc = VINF_EM_RAW_EMULATE_INSTR;
646 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
647 }
648 }
649
650 /*
651 * Quick hack, with logging enabled we're getting stale
652 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
653 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
654 * have to be fixed to support this. But that'll have to wait till next week.
655 *
656 * An alternative is to keep track of the changed PTEs together with the
657 * GCPhys from the guest PT. This may proove expensive though.
658 *
659 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
660 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
661 */
662 PGM_INVL_GUEST_TLBS();
663
664 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
665 return rc;
666}
667
668
669/**
670 * \#PF Handler callback for PT write accesses.
671 *
672 * @returns VBox status code (appropriate for GC return).
673 * @param pVM VM Handle.
674 * @param uErrorCode CPU Error code.
675 * @param pRegFrame Trap register frame.
676 * NULL on DMA and other non CPU access.
677 * @param pvFault The fault address (cr2).
678 * @param GCPhysFault The GC physical address corresponding to pvFault.
679 * @param pvUser User argument.
680 */
681DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
682{
683 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
684 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
685 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
686 LogFlow(("pgmPoolAccessHandler: pvFault=%p pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
687
688 /*
689 * We should ALWAYS have the list head as user parameter. This
690 * is because we use that page to record the changes.
691 */
692 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
693
694 /*
695 * Disassemble the faulting instruction.
696 */
697 DISCPUSTATE Cpu;
698 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
699 AssertRCReturn(rc, rc);
700
701 /*
702 * Check if it's worth dealing with.
703 */
704 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
705 || pPage->fCR3Mix)
706 && !pgmPoolMonitorIsReused(pPage, &Cpu,pvFault)
707 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
708 {
709 /*
710 * Simple instructions, no REP prefix.
711 */
712 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
713 {
714 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
715 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
716 return rc;
717 }
718
719 /*
720 * Windows is frequently doing small memset() operations (netio test 4k+).
721 * We have to deal with these or we'll kill the cache and performance.
722 */
723 if ( Cpu.pCurInstr->opcode == OP_STOSWD
724 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
725 && pRegFrame->ecx <= 0x20
726 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
727 && !((uintptr_t)pvFault & 3)
728 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
729 && Cpu.mode == CPUMODE_32BIT
730 && Cpu.opmode == CPUMODE_32BIT
731 && Cpu.addrmode == CPUMODE_32BIT
732 && Cpu.prefix == PREFIX_REP
733 && !pRegFrame->eflags.Bits.u1DF
734 )
735 {
736 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
737 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
738 return rc;
739 }
740
741 /* REP prefix, don't bother. */
742 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
743 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
744 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
745 }
746
747 /*
748 * Not worth it, so flush it.
749 */
750 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
751 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
752 return rc;
753}
754
755# endif /* !IN_RING3 */
756#endif /* PGMPOOL_WITH_MONITORING */
757
758
759
760#ifdef PGMPOOL_WITH_CACHE
761/**
762 * Inserts a page into the GCPhys hash table.
763 *
764 * @param pPool The pool.
765 * @param pPage The page.
766 */
767DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
768{
769 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
770 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
771 pPage->iNext = pPool->aiHash[iHash];
772 pPool->aiHash[iHash] = pPage->idx;
773}
774
775
776/**
777 * Removes a page from the GCPhys hash table.
778 *
779 * @param pPool The pool.
780 * @param pPage The page.
781 */
782DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
783{
784 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
785 if (pPool->aiHash[iHash] == pPage->idx)
786 pPool->aiHash[iHash] = pPage->iNext;
787 else
788 {
789 uint16_t iPrev = pPool->aiHash[iHash];
790 for (;;)
791 {
792 const int16_t i = pPool->aPages[iPrev].iNext;
793 if (i == pPage->idx)
794 {
795 pPool->aPages[iPrev].iNext = pPage->iNext;
796 break;
797 }
798 if (i == NIL_PGMPOOL_IDX)
799 {
800 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
801 break;
802 }
803 iPrev = i;
804 }
805 }
806 pPage->iNext = NIL_PGMPOOL_IDX;
807}
808
809
810/**
811 * Frees up one cache page.
812 *
813 * @returns VBox status code.
814 * @retval VINF_SUCCESS on success.
815 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
816 * @param pPool The pool.
817 * @param iUser The user index.
818 */
819static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
820{
821 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
822 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
823
824 /*
825 * Select one page from the tail of the age list.
826 */
827 uint16_t iToFree = pPool->iAgeTail;
828 if (iToFree == iUser)
829 iToFree = pPool->aPages[iToFree].iAgePrev;
830/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
831 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
832 {
833 uint16_t i = pPool->aPages[iToFree].iAgePrev;
834 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
835 {
836 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
837 continue;
838 iToFree = i;
839 break;
840 }
841 }
842*/
843 Assert(iToFree != iUser);
844 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
845
846 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
847 if (rc == VINF_SUCCESS)
848 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
849 return rc;
850}
851
852
853/**
854 * Checks if a kind mismatch is really a page being reused
855 * or if it's just normal remappings.
856 *
857 * @returns true if reused and the cached page (enmKind1) should be flushed
858 * @returns false if not reused.
859 * @param enmKind1 The kind of the cached page.
860 * @param enmKind2 The kind of the requested page.
861 */
862static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
863{
864 switch (enmKind1)
865 {
866 /*
867 * It's prefectly fine to reuse these..
868 */
869 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
870 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
871 return true;
872
873 /*
874 * It's prefectly fine to reuse these, except for PAE stuff.
875 */
876 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
877 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
878 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
879 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
880 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
881 switch (enmKind2)
882 {
883 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
884 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
885 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
886 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
887 return true;
888 default:
889 return false;
890 }
891
892 /*
893 * It's prefectly fine to reuse these, except for non-PAE stuff.
894 */
895 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
896 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
897 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
898 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
899 switch (enmKind2)
900 {
901 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
902 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
903 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
904 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
905 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
906 return true;
907 default:
908 return false;
909 }
910
911 /*
912 * These cannot be flushed, and it's common to reuse the PDs as PTs.
913 */
914 case PGMPOOLKIND_ROOT_32BIT_PD:
915 case PGMPOOLKIND_ROOT_PAE_PD:
916 case PGMPOOLKIND_ROOT_PDPTR:
917 case PGMPOOLKIND_ROOT_PML4:
918 return false;
919
920 default:
921 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
922 }
923}
924
925
926/**
927 * Attempts to satisfy a pgmPoolAlloc request from the cache.
928 *
929 * @returns VBox status code.
930 * @retval VINF_PGM_CACHED_PAGE on success.
931 * @retval VERR_FILE_NOT_FOUND if not found.
932 * @param pPool The pool.
933 * @param GCPhys The GC physical address of the page we're gonna shadow.
934 * @param enmKind The kind of mapping.
935 * @param iUser The shadow page pool index of the user table.
936 * @param iUserTable The index into the user table (shadowed).
937 * @param ppPage Where to store the pointer to the page.
938 */
939static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
940{
941 /*
942 * Look up the GCPhys in the hash.
943 */
944 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
945 if (i != NIL_PGMPOOL_IDX)
946 {
947 do
948 {
949 PPGMPOOLPAGE pPage = &pPool->aPages[i];
950 if (pPage->GCPhys == GCPhys)
951 {
952 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
953 {
954 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
955 if (VBOX_SUCCESS(rc))
956 {
957 *ppPage = pPage;
958 STAM_COUNTER_INC(&pPool->StatCacheHits);
959 return VINF_PGM_CACHED_PAGE;
960 }
961 return rc;
962 }
963
964 /*
965 * The kind is different. In some cases we should now flush the page
966 * as it has been reused, but in most cases this is normal remapping
967 * of PDs as PT or big pages using the GCPhys field in a sligly
968 * different way than the other kinds.
969 */
970 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
971 {
972 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
973 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
974 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
975 break;
976 }
977 }
978
979 /* next */
980 i = pPage->iNext;
981 } while (i != NIL_PGMPOOL_IDX);
982 }
983
984 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
985 STAM_COUNTER_INC(&pPool->StatCacheMisses);
986 return VERR_FILE_NOT_FOUND;
987}
988
989
990/**
991 * Inserts a page into the cache.
992 *
993 * @param pPool The pool.
994 * @param pPage The cached page.
995 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
996 */
997static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
998{
999 /*
1000 * Insert into the GCPhys hash if the page is fit for that.
1001 */
1002 Assert(!pPage->fCached);
1003 if (fCanBeCached)
1004 {
1005 pPage->fCached = true;
1006 pgmPoolHashInsert(pPool, pPage);
1007 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1008 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1009 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1010 }
1011 else
1012 {
1013 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1014 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1015 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1016 }
1017
1018 /*
1019 * Insert at the head of the age list.
1020 */
1021 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1022 pPage->iAgeNext = pPool->iAgeHead;
1023 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1024 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1025 else
1026 pPool->iAgeTail = pPage->idx;
1027 pPool->iAgeHead = pPage->idx;
1028}
1029
1030
1031/**
1032 * Flushes a cached page.
1033 *
1034 * @param pPool The pool.
1035 * @param pPage The cached page.
1036 */
1037static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1038{
1039 /*
1040 * Remove the page from the hash.
1041 */
1042 if (pPage->fCached)
1043 {
1044 pPage->fCached = false;
1045 pgmPoolHashRemove(pPool, pPage);
1046 }
1047 else
1048 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1049
1050 /*
1051 * Remove it from the age list.
1052 */
1053 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1054 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1055 else
1056 pPool->iAgeTail = pPage->iAgePrev;
1057 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1058 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1059 else
1060 pPool->iAgeHead = pPage->iAgeNext;
1061 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1062 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1063}
1064#endif /* PGMPOOL_WITH_CACHE */
1065
1066
1067#ifdef PGMPOOL_WITH_MONITORING
1068/**
1069 * Looks for pages sharing the monitor.
1070 *
1071 * @returns Pointer to the head page.
1072 * @returns NULL if not found.
1073 * @param pPool The Pool
1074 * @param pNewPage The page which is going to be monitored.
1075 */
1076static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1077{
1078#ifdef PGMPOOL_WITH_CACHE
1079 /*
1080 * Look up the GCPhys in the hash.
1081 */
1082 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1083 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1084 if (i == NIL_PGMPOOL_IDX)
1085 return NULL;
1086 do
1087 {
1088 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1089 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1090 && pPage != pNewPage)
1091 {
1092 switch (pPage->enmKind)
1093 {
1094 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1095 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1096 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1097 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1098 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1099 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1100 case PGMPOOLKIND_ROOT_32BIT_PD:
1101 case PGMPOOLKIND_ROOT_PAE_PD:
1102 case PGMPOOLKIND_ROOT_PDPTR:
1103 case PGMPOOLKIND_ROOT_PML4:
1104 {
1105 /* find the head */
1106 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1107 {
1108 Assert(pPage->iMonitoredPrev != pPage->idx);
1109 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1110 }
1111 return pPage;
1112 }
1113
1114 /* ignore, no monitoring. */
1115 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1116 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1117 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1118 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1119 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1120 break;
1121 default:
1122 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1123 }
1124 }
1125
1126 /* next */
1127 i = pPage->iNext;
1128 } while (i != NIL_PGMPOOL_IDX);
1129#endif
1130 return NULL;
1131}
1132
1133/**
1134 * Enabled write monitoring of a guest page.
1135 *
1136 * @returns VBox status code.
1137 * @retval VINF_SUCCESS on success.
1138 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1139 * @param pPool The pool.
1140 * @param pPage The cached page.
1141 */
1142static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1143{
1144 /*
1145 * Filter out the relevant kinds.
1146 */
1147 switch (pPage->enmKind)
1148 {
1149 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1150 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1151 break;
1152
1153 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1154 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1155 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1156 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1157 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1158 /* Nothing to monitor here. */
1159 return VINF_SUCCESS;
1160
1161 case PGMPOOLKIND_ROOT_32BIT_PD:
1162 case PGMPOOLKIND_ROOT_PAE_PD:
1163#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1164 break;
1165#endif
1166 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1167 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1168 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1169 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1170 case PGMPOOLKIND_ROOT_PDPTR:
1171 case PGMPOOLKIND_ROOT_PML4:
1172 default:
1173 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1174 }
1175
1176 /*
1177 * Install handler.
1178 */
1179 int rc;
1180 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1181 if (pPageHead)
1182 {
1183 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1184 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1185 pPage->iMonitoredPrev = pPageHead->idx;
1186 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1187 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1188 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1189 pPageHead->iMonitoredNext = pPage->idx;
1190 rc = VINF_SUCCESS;
1191 }
1192 else
1193 {
1194 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1195 PVM pVM = pPool->CTXSUFF(pVM);
1196 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1197 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1198 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1199 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pPage),
1200 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pPage),
1201 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pPage),
1202 pPool->pszAccessHandler);
1203 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1204 * the heap size should suffice. */
1205 AssertFatalRC(rc);
1206 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1207 rc = VERR_PGM_POOL_CLEARED;
1208 }
1209 pPage->fMonitored = true;
1210 return rc;
1211}
1212
1213
1214/**
1215 * Disables write monitoring of a guest page.
1216 *
1217 * @returns VBox status code.
1218 * @retval VINF_SUCCESS on success.
1219 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1220 * @param pPool The pool.
1221 * @param pPage The cached page.
1222 */
1223static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1224{
1225 /*
1226 * Filter out the relevant kinds.
1227 */
1228 switch (pPage->enmKind)
1229 {
1230 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1231 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1232 break;
1233
1234 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1235 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1236 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1237 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1238 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1239 /* Nothing to monitor here. */
1240 return VINF_SUCCESS;
1241
1242 case PGMPOOLKIND_ROOT_32BIT_PD:
1243 case PGMPOOLKIND_ROOT_PAE_PD:
1244#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1245 break;
1246#endif
1247 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1248 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1249 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1250 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1251 case PGMPOOLKIND_ROOT_PDPTR:
1252 case PGMPOOLKIND_ROOT_PML4:
1253 default:
1254 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1255 }
1256
1257 /*
1258 * Remove the page from the monitored list or uninstall it if last.
1259 */
1260 const PVM pVM = pPool->CTXSUFF(pVM);
1261 int rc;
1262 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1263 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1264 {
1265 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1266 {
1267 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1268 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1269 pNewHead->fCR3Mix = pPage->fCR3Mix;
1270 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1271 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pNewHead),
1272 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pNewHead),
1273 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pNewHead),
1274 pPool->pszAccessHandler);
1275 AssertFatalRCSuccess(rc);
1276 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1277 }
1278 else
1279 {
1280 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1281 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1282 {
1283 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1284 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1285 }
1286 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1287 rc = VINF_SUCCESS;
1288 }
1289 }
1290 else
1291 {
1292 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1293 AssertFatalRC(rc);
1294 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1295 rc = VERR_PGM_POOL_CLEARED;
1296 }
1297 pPage->fMonitored = false;
1298
1299 /*
1300 * Remove it from the list of modified pages (if in it).
1301 */
1302 pgmPoolMonitorModifiedRemove(pPool, pPage);
1303
1304 return rc;
1305}
1306
1307
1308#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1309/**
1310 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1311 *
1312 * @param pPool The Pool.
1313 * @param pPage A page in the chain.
1314 * @param fCR3Mix The new fCR3Mix value.
1315 */
1316static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1317{
1318 /* current */
1319 pPage->fCR3Mix = fCR3Mix;
1320
1321 /* before */
1322 int16_t idx = pPage->iMonitoredPrev;
1323 while (idx != NIL_PGMPOOL_IDX)
1324 {
1325 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1326 idx = pPool->aPages[idx].iMonitoredPrev;
1327 }
1328
1329 /* after */
1330 idx = pPage->iMonitoredNext;
1331 while (idx != NIL_PGMPOOL_IDX)
1332 {
1333 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1334 idx = pPool->aPages[idx].iMonitoredNext;
1335 }
1336}
1337
1338
1339/**
1340 * Installs or modifies monitoring of a CR3 page (special).
1341 *
1342 * We're pretending the CR3 page is shadowed by the pool so we can use the
1343 * generic mechanisms in detecting chained monitoring. (This also gives us a
1344 * tast of what code changes are required to really pool CR3 shadow pages.)
1345 *
1346 * @returns VBox status code.
1347 * @param pPool The pool.
1348 * @param idxRoot The CR3 (root) page index.
1349 * @param GCPhysCR3 The (new) CR3 value.
1350 */
1351int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1352{
1353 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1354 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1355 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1356 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1357
1358 /*
1359 * The unlikely case where it already matches.
1360 */
1361 if (pPage->GCPhys == GCPhysCR3)
1362 {
1363 Assert(pPage->fMonitored);
1364 return VINF_SUCCESS;
1365 }
1366
1367 /*
1368 * Flush the current monitoring and remove it from the hash.
1369 */
1370 int rc = VINF_SUCCESS;
1371 if (pPage->fMonitored)
1372 {
1373 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1374 rc = pgmPoolMonitorFlush(pPool, pPage);
1375 if (rc == VERR_PGM_POOL_CLEARED)
1376 rc = VINF_SUCCESS;
1377 else
1378 AssertFatalRC(rc);
1379 pgmPoolHashRemove(pPool, pPage);
1380 }
1381
1382 /*
1383 * Monitor the page at the new location and insert it into the hash.
1384 */
1385 pPage->GCPhys = GCPhysCR3;
1386 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1387 if (rc2 != VERR_PGM_POOL_CLEARED)
1388 {
1389 AssertFatalRC(rc2);
1390 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1391 rc = rc2;
1392 }
1393 pgmPoolHashInsert(pPool, pPage);
1394 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1395 return rc;
1396}
1397
1398
1399/**
1400 * Removes the monitoring of a CR3 page (special).
1401 *
1402 * @returns VBox status code.
1403 * @param pPool The pool.
1404 * @param idxRoot The CR3 (root) page index.
1405 */
1406int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1407{
1408 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1409 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1410 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1411 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1412
1413 if (!pPage->fMonitored)
1414 return VINF_SUCCESS;
1415
1416 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1417 int rc = pgmPoolMonitorFlush(pPool, pPage);
1418 if (rc != VERR_PGM_POOL_CLEARED)
1419 AssertFatalRC(rc);
1420 else
1421 rc = VINF_SUCCESS;
1422 pgmPoolHashRemove(pPool, pPage);
1423 Assert(!pPage->fMonitored);
1424 pPage->GCPhys = NIL_RTGCPHYS;
1425 return rc;
1426}
1427#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1428
1429
1430/**
1431 * Inserts the page into the list of modified pages.
1432 *
1433 * @param pPool The pool.
1434 * @param pPage The page.
1435 */
1436void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1437{
1438 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1439 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1440 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1441 && pPool->iModifiedHead != pPage->idx,
1442 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1443 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1444 pPool->iModifiedHead, pPool->cModifiedPages));
1445
1446 pPage->iModifiedNext = pPool->iModifiedHead;
1447 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1448 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1449 pPool->iModifiedHead = pPage->idx;
1450 pPool->cModifiedPages++;
1451#ifdef VBOX_WITH_STATISTICS
1452 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1453 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1454#endif
1455}
1456
1457
1458/**
1459 * Removes the page from the list of modified pages and resets the
1460 * moficiation counter.
1461 *
1462 * @param pPool The pool.
1463 * @param pPage The page which is believed to be in the list of modified pages.
1464 */
1465static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1466{
1467 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1468 if (pPool->iModifiedHead == pPage->idx)
1469 {
1470 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1471 pPool->iModifiedHead = pPage->iModifiedNext;
1472 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1473 {
1474 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1475 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1476 }
1477 pPool->cModifiedPages--;
1478 }
1479 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1480 {
1481 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1482 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1483 {
1484 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1485 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1486 }
1487 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1488 pPool->cModifiedPages--;
1489 }
1490 else
1491 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1492 pPage->cModifications = 0;
1493}
1494
1495
1496/**
1497 * Zaps the list of modified pages, resetting their modification counters in the process.
1498 *
1499 * @param pVM The VM handle.
1500 */
1501void pgmPoolMonitorModifiedClearAll(PVM pVM)
1502{
1503 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1504 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1505
1506 unsigned cPages = 0; NOREF(cPages);
1507 uint16_t idx = pPool->iModifiedHead;
1508 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1509 while (idx != NIL_PGMPOOL_IDX)
1510 {
1511 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1512 idx = pPage->iModifiedNext;
1513 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1514 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1515 pPage->cModifications = 0;
1516 Assert(++cPages);
1517 }
1518 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1519 pPool->cModifiedPages = 0;
1520}
1521
1522
1523/**
1524 * Clear all shadow pages and clear all modification counters.
1525 *
1526 * @param pVM The VM handle.
1527 * @remark Should only be used when monitoring is available, thus placed in
1528 * the PGMPOOL_WITH_MONITORING #ifdef.
1529 */
1530void pgmPoolClearAll(PVM pVM)
1531{
1532 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1533 STAM_PROFILE_START(&pPool->StatClearAll, c);
1534 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1535
1536 /*
1537 * Iterate all the pages until we've encountered all that in use.
1538 * This is simple but not quite optimal solution.
1539 */
1540 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1541 unsigned cLeft = pPool->cUsedPages;
1542 unsigned iPage = pPool->cCurPages;
1543 while (--iPage >= PGMPOOL_IDX_FIRST)
1544 {
1545 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1546 if (pPage->GCPhys != NIL_RTGCPHYS)
1547 {
1548 switch (pPage->enmKind)
1549 {
1550 /*
1551 * We only care about shadow page tables.
1552 */
1553 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1554 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1555 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1556 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1557 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1558 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1559 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1560 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1561 {
1562#ifdef PGMPOOL_WITH_USER_TRACKING
1563 if (pPage->cPresent)
1564#endif
1565 {
1566 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1567 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1568 ASMMemZeroPage(pvShw);
1569 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1570#ifdef PGMPOOL_WITH_USER_TRACKING
1571 pPage->cPresent = 0;
1572 pPage->iFirstPresent = ~0;
1573#endif
1574 }
1575 }
1576 /* fall thru */
1577
1578 default:
1579 Assert(!pPage->cModifications || ++cModifiedPages);
1580 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1581 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1582 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1583 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1584 pPage->cModifications = 0;
1585 break;
1586
1587 }
1588 if (!--cLeft)
1589 break;
1590 }
1591 }
1592
1593 /* swipe the special pages too. */
1594 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1595 {
1596 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1597 if (pPage->GCPhys != NIL_RTGCPHYS)
1598 {
1599 Assert(!pPage->cModifications || ++cModifiedPages);
1600 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1601 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1602 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1603 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1604 pPage->cModifications = 0;
1605 }
1606 }
1607
1608 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1609 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1610 pPool->cModifiedPages = 0;
1611
1612#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1613 /*
1614 * Clear all the GCPhys links and rebuild the phys ext free list.
1615 */
1616 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
1617 pRam;
1618 pRam = pRam->CTXSUFF(pNext))
1619 {
1620 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1621 while (iPage-- > 0)
1622 pRam->aHCPhys[iPage] &= MM_RAM_FLAGS_NO_REFS_MASK;
1623 }
1624
1625 pPool->iPhysExtFreeHead = 0;
1626 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1627 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1628 for (unsigned i = 0; i < cMaxPhysExts; i++)
1629 {
1630 paPhysExts[i].iNext = i + 1;
1631 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1632 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1633 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1634 }
1635 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1636#endif
1637
1638
1639 pPool->cPresent = 0;
1640 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1641}
1642#endif /* PGMPOOL_WITH_MONITORING */
1643
1644
1645#ifdef PGMPOOL_WITH_USER_TRACKING
1646/**
1647 * Frees up at least one user entry.
1648 *
1649 * @returns VBox status code.
1650 * @retval VINF_SUCCESS if successfully added.
1651 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1652 * @param pPool The pool.
1653 * @param iUser The user index.
1654 */
1655static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1656{
1657 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1658#ifdef PGMPOOL_WITH_CACHE
1659 /*
1660 * Just free cached pages in a braindead fashion.
1661 */
1662 /** @todo walk the age list backwards and free the first with usage. */
1663 int rc = VINF_SUCCESS;
1664 do
1665 {
1666 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1667 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1668 rc = rc2;
1669 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1670 return rc;
1671#else
1672 /*
1673 * Lazy approach.
1674 */
1675 pgmPoolFlushAllInt(pPool);
1676 return VERR_PGM_POOL_FLUSHED;
1677#endif
1678}
1679
1680
1681/**
1682 * Inserts a page into the cache.
1683 *
1684 * This will create user node for the page, insert it into the GCPhys
1685 * hash, and insert it into the age list.
1686 *
1687 * @returns VBox status code.
1688 * @retval VINF_SUCCESS if successfully added.
1689 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1690 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1691 * @param pPool The pool.
1692 * @param pPage The cached page.
1693 * @param GCPhys The GC physical address of the page we're gonna shadow.
1694 * @param iUser The user index.
1695 * @param iUserTable The user table index.
1696 */
1697DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1698{
1699 int rc = VINF_SUCCESS;
1700 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1701
1702 /*
1703 * Find free a user node.
1704 */
1705 uint16_t i = pPool->iUserFreeHead;
1706 if (i == NIL_PGMPOOL_USER_INDEX)
1707 {
1708 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1709 if (VBOX_FAILURE(rc))
1710 return rc;
1711 i = pPool->iUserFreeHead;
1712 }
1713
1714 /*
1715 * Unlink the user node from the free list,
1716 * initialize and insert it into the user list.
1717 */
1718 pPool->iUserFreeHead = pUser[i].iNext;
1719 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1720 pUser[i].iUser = iUser;
1721 pUser[i].iUserTable = iUserTable;
1722 pPage->iUserHead = i;
1723
1724 /*
1725 * Insert into cache and enable monitoring of the guest page if enabled.
1726 *
1727 * Until we implement caching of all levels, including the CR3 one, we'll
1728 * have to make sure we don't try monitor & cache any recursive reuse of
1729 * a monitored CR3 page. Because all windows versions are doing this we'll
1730 * have to be able to do combined access monitoring, CR3 + PT and
1731 * PD + PT (guest PAE).
1732 *
1733 * Update:
1734 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1735 */
1736#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1737# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1738 const bool fCanBeMonitored = true;
1739# else
1740 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1741 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1742 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1743# endif
1744# ifdef PGMPOOL_WITH_CACHE
1745 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1746# endif
1747 if (fCanBeMonitored)
1748 {
1749# ifdef PGMPOOL_WITH_MONITORING
1750 rc = pgmPoolMonitorInsert(pPool, pPage);
1751 if (rc == VERR_PGM_POOL_CLEARED)
1752 {
1753 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1754# ifndef PGMPOOL_WITH_CACHE
1755 pgmPoolMonitorFlush(pPool, pPage);
1756 rc = VERR_PGM_POOL_FLUSHED;
1757# endif
1758 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1759 pUser[i].iNext = pPool->iUserFreeHead;
1760 pUser[i].iUser = NIL_PGMPOOL_IDX;
1761 pPool->iUserFreeHead = i;
1762 }
1763 }
1764# endif
1765#endif /* PGMPOOL_WITH_MONITORING */
1766 return rc;
1767}
1768
1769
1770# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1771/**
1772 * Adds a user reference to a page.
1773 *
1774 * This will
1775 * This will move the page to the head of the
1776 *
1777 * @returns VBox status code.
1778 * @retval VINF_SUCCESS if successfully added.
1779 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1780 * @param pPool The pool.
1781 * @param pPage The cached page.
1782 * @param iUser The user index.
1783 * @param iUserTable The user table.
1784 */
1785static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1786{
1787 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1788
1789# ifdef VBOX_STRICT
1790 /*
1791 * Check that the entry doesn't already exists.
1792 */
1793 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1794 {
1795 uint16_t i = pPage->iUserHead;
1796 do
1797 {
1798 Assert(i < pPool->cMaxUsers);
1799 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%d %d\n", iUser, iUserTable));
1800 i = paUsers[i].iNext;
1801 } while (i != NIL_PGMPOOL_USER_INDEX);
1802 }
1803# endif
1804
1805 /*
1806 * Allocate a user node.
1807 */
1808 uint16_t i = pPool->iUserFreeHead;
1809 if (i == NIL_PGMPOOL_USER_INDEX)
1810 {
1811 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1812 if (VBOX_FAILURE(rc))
1813 return rc;
1814 i = pPool->iUserFreeHead;
1815 }
1816 pPool->iUserFreeHead = paUsers[i].iNext;
1817
1818 /*
1819 * Initialize the user node and insert it.
1820 */
1821 paUsers[i].iNext = pPage->iUserHead;
1822 paUsers[i].iUser = iUser;
1823 paUsers[i].iUserTable = iUserTable;
1824 pPage->iUserHead = i;
1825
1826# ifdef PGMPOOL_WITH_CACHE
1827 /*
1828 * Tell the cache to update its replacement stats for this page.
1829 */
1830 pgmPoolCacheUsed(pPool, pPage);
1831# endif
1832 return VINF_SUCCESS;
1833}
1834# endif /* PGMPOOL_WITH_CACHE */
1835
1836
1837/**
1838 * Frees a user record associated with a page.
1839 *
1840 * This does not clear the entry in the user table, it simply replaces the
1841 * user record to the chain of free records.
1842 *
1843 * @param pPool The pool.
1844 * @param HCPhys The HC physical address of the shadow page.
1845 * @param iUser The shadow page pool index of the user table.
1846 * @param iUserTable The index into the user table (shadowed).
1847 */
1848static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1849{
1850 /*
1851 * Unlink and free the specified user entry.
1852 */
1853 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1854
1855 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1856 uint16_t i = pPage->iUserHead;
1857 if ( i != NIL_PGMPOOL_USER_INDEX
1858 && paUsers[i].iUser == iUser
1859 && paUsers[i].iUserTable == iUserTable)
1860 {
1861 pPage->iUserHead = paUsers[i].iNext;
1862
1863 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1864 paUsers[i].iNext = pPool->iUserFreeHead;
1865 pPool->iUserFreeHead = i;
1866 return;
1867 }
1868
1869 /* General: Linear search. */
1870 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1871 while (i != NIL_PGMPOOL_USER_INDEX)
1872 {
1873 if ( paUsers[i].iUser == iUser
1874 && paUsers[i].iUserTable == iUserTable)
1875 {
1876 if (iPrev != NIL_PGMPOOL_USER_INDEX)
1877 paUsers[iPrev].iNext = paUsers[i].iNext;
1878 else
1879 pPage->iUserHead = paUsers[i].iNext;
1880
1881 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1882 paUsers[i].iNext = pPool->iUserFreeHead;
1883 pPool->iUserFreeHead = i;
1884 return;
1885 }
1886 iPrev = i;
1887 i = paUsers[i].iNext;
1888 }
1889
1890 /* Fatal: didn't find it */
1891 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
1892 iUser, iUserTable, pPage->GCPhys));
1893}
1894
1895
1896/**
1897 * Gets the entry size of a shadow table.
1898 *
1899 * @param enmKind
1900 * The kind of page.
1901 *
1902 * @returns The size of the entry in bytes. That is, 4 or 8.
1903 * @returns If the kind is not for a table, an assertion is raised and 0 is
1904 * returned.
1905 */
1906DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
1907{
1908 switch (enmKind)
1909 {
1910 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1911 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1912 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1913 case PGMPOOLKIND_ROOT_32BIT_PD:
1914 return 4;
1915
1916 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1917 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1918 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1919 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1920 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1921 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1922 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1923 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1924 case PGMPOOLKIND_ROOT_PAE_PD:
1925 case PGMPOOLKIND_ROOT_PDPTR:
1926 case PGMPOOLKIND_ROOT_PML4:
1927 return 8;
1928
1929 default:
1930 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1931 }
1932}
1933
1934
1935/**
1936 * Gets the entry size of a guest table.
1937 *
1938 * @param enmKind
1939 * The kind of page.
1940 *
1941 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
1942 * @returns If the kind is not for a table, an assertion is raised and 0 is
1943 * returned.
1944 */
1945DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
1946{
1947 switch (enmKind)
1948 {
1949 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1950 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1951 case PGMPOOLKIND_ROOT_32BIT_PD:
1952 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1953 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1954 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1955 return 4;
1956
1957 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1958 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1959 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1960 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1961 case PGMPOOLKIND_ROOT_PAE_PD:
1962 case PGMPOOLKIND_ROOT_PDPTR:
1963 case PGMPOOLKIND_ROOT_PML4:
1964 return 8;
1965
1966 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1967 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1968 /** @todo can we return 0? (nobody is calling this...) */
1969 return 0;
1970
1971 default:
1972 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1973 }
1974}
1975
1976
1977#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1978/**
1979 * Scans one shadow page table for mappings of a physical page.
1980 *
1981 * @param pVM The VM handle.
1982 * @param pHCPhys The aHCPhys ramrange entry in question.
1983 * @param iShw The shadow page table.
1984 * @param cRefs The number of references made in that PT.
1985 */
1986static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCRTHCPHYS pHCPhys, uint16_t iShw, uint16_t cRefs)
1987{
1988 LogFlow(("pgmPoolTrackFlushGCPhysPT: pHCPhys=%p:{%RHp} iShw=%d cRefs=%d\n", pHCPhys, *pHCPhys, iShw, cRefs));
1989 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1990
1991 /*
1992 * Assert sanity.
1993 */
1994 Assert(cRefs == 1);
1995 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
1996 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
1997
1998 /*
1999 * Then, clear the actual mappings to the page in the shadow PT.
2000 */
2001 switch (pPage->enmKind)
2002 {
2003 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2004 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2005 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2006 {
2007 const uint32_t u32 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2008 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2009 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2010 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2011 {
2012 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2013 pPT->a[i].u = 0;
2014 cRefs--;
2015 if (!cRefs)
2016 return;
2017 }
2018#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2019 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2020 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2021 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2022 {
2023 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2024 pPT->a[i].u = 0;
2025 }
2026#endif
2027 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2028 break;
2029 }
2030
2031 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2032 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2033 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2034 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2035 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2036 {
2037 const uint64_t u64 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2038 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2039 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2040 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2041 {
2042 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2043 pPT->a[i].u = 0;
2044 cRefs--;
2045 if (!cRefs)
2046 return;
2047 }
2048#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2049 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2050 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2051 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2052 {
2053 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2054 pPT->a[i].u = 0;
2055 }
2056#endif
2057 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2058 break;
2059 }
2060
2061 default:
2062 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2063 }
2064}
2065
2066
2067/**
2068 * Scans one shadow page table for mappings of a physical page.
2069 *
2070 * @param pVM The VM handle.
2071 * @param pHCPhys The aHCPhys ramrange entry in question.
2072 * @param iShw The shadow page table.
2073 * @param cRefs The number of references made in that PT.
2074 */
2075void pgmPoolTrackFlushGCPhysPT(PVM pVM, PRTHCPHYS pHCPhys, uint16_t iShw, uint16_t cRefs)
2076{
2077 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2078 LogFlow(("pgmPoolTrackFlushGCPhysPT: pHCPhys=%p:{%RHp} iShw=%d cRefs=%d\n", pHCPhys, *pHCPhys, iShw, cRefs));
2079 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2080 pgmPoolTrackFlushGCPhysPTInt(pVM, pHCPhys, iShw, cRefs);
2081 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2082 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2083}
2084
2085
2086/**
2087 * Flushes a list of shadow page tables mapping the same physical page.
2088 *
2089 * @param pVM The VM handle.
2090 * @param pHCPhys The aHCPhys ramrange entry in question.
2091 * @param iPhysExt The physical cross reference extent list to flush.
2092 */
2093void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PRTHCPHYS pHCPhys, uint16_t iPhysExt)
2094{
2095 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2096 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2097 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pHCPhys=%p:{%RHp} iPhysExt\n", pHCPhys, *pHCPhys, iPhysExt));
2098
2099 const uint16_t iPhysExtStart = iPhysExt;
2100 PPGMPOOLPHYSEXT pPhysExt;
2101 do
2102 {
2103 Assert(iPhysExt < pPool->cMaxPhysExts);
2104 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2105 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2106 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2107 {
2108 pgmPoolTrackFlushGCPhysPTInt(pVM, pHCPhys, pPhysExt->aidx[i], 1);
2109 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2110 }
2111
2112 /* next */
2113 iPhysExt = pPhysExt->iNext;
2114 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2115
2116 /* insert the list into the free list and clear the ram range entry. */
2117 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2118 pPool->iPhysExtFreeHead = iPhysExtStart;
2119 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2120
2121 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2122}
2123#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2124
2125
2126/**
2127 * Scans all shadow page tables for mappings of a physical page.
2128 *
2129 * This may be slow, but it's most likely more efficient than cleaning
2130 * out the entire page pool / cache.
2131 *
2132 * @returns VBox status code.
2133 * @retval VINF_SUCCESS if all references has been successfully cleared.
2134 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2135 * a page pool cleaning.
2136 *
2137 * @param pVM The VM handle.
2138 * @param pHCPhys The aHCPhys ramrange entry in question.
2139 */
2140int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PRTHCPHYS pHCPhys)
2141{
2142 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2143 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2144 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d *pHCPhys=%RHp\n",
2145 pPool->cUsedPages, pPool->cPresent, *pHCPhys));
2146
2147#if 1
2148 /*
2149 * There is a limit to what makes sense.
2150 */
2151 if (pPool->cPresent > 1024)
2152 {
2153 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2154 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2155 return VINF_PGM_GCPHYS_ALIASED;
2156 }
2157#endif
2158
2159 /*
2160 * Iterate all the pages until we've encountered all that in use.
2161 * This is simple but not quite optimal solution.
2162 */
2163 const uint64_t u64 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2164 const uint32_t u32 = u64;
2165 unsigned cLeft = pPool->cUsedPages;
2166 unsigned iPage = pPool->cCurPages;
2167 while (--iPage >= PGMPOOL_IDX_FIRST)
2168 {
2169 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2170 if (pPage->GCPhys != NIL_RTGCPHYS)
2171 {
2172 switch (pPage->enmKind)
2173 {
2174 /*
2175 * We only care about shadow page tables.
2176 */
2177 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2178 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2179 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2180 {
2181 unsigned cPresent = pPage->cPresent;
2182 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2183 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2184 if (pPT->a[i].n.u1Present)
2185 {
2186 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2187 {
2188 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2189 pPT->a[i].u = 0;
2190 }
2191 if (!--cPresent)
2192 break;
2193 }
2194 break;
2195 }
2196
2197 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2198 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2199 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2200 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2201 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2202 {
2203 unsigned cPresent = pPage->cPresent;
2204 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2205 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2206 if (pPT->a[i].n.u1Present)
2207 {
2208 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2209 {
2210 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2211 pPT->a[i].u = 0;
2212 }
2213 if (!--cPresent)
2214 break;
2215 }
2216 break;
2217 }
2218 }
2219 if (!--cLeft)
2220 break;
2221 }
2222 }
2223
2224 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2225 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2226 return VINF_SUCCESS;
2227}
2228
2229
2230/**
2231 * Clears the user entry in a user table.
2232 *
2233 * This is used to remove all references to a page when flushing it.
2234 */
2235static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2236{
2237 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2238 Assert(pUser->iUser < pPool->cCurPages);
2239
2240 /*
2241 * Map the user page.
2242 */
2243 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2244 union
2245 {
2246 uint64_t *pau64;
2247 uint32_t *pau32;
2248 } u;
2249 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2250
2251#ifdef VBOX_STRICT
2252 /*
2253 * Some sanity checks.
2254 */
2255 switch (pUserPage->enmKind)
2256 {
2257 case PGMPOOLKIND_ROOT_32BIT_PD:
2258 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2259 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2260 break;
2261 case PGMPOOLKIND_ROOT_PAE_PD:
2262 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2263 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2264 break;
2265 case PGMPOOLKIND_ROOT_PDPTR:
2266 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2267 Assert(pUser->iUserTable < 4);
2268 break;
2269 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2270 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2271 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2272 break;
2273 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2274 case PGMPOOLKIND_ROOT_PML4:
2275 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2276 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2277 break;
2278 default:
2279 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2280 break;
2281 }
2282#endif /* VBOX_STRICT */
2283
2284 /*
2285 * Clear the entry in the user page.
2286 */
2287 switch (pUserPage->enmKind)
2288 {
2289 /* 32-bit entries */
2290 case PGMPOOLKIND_ROOT_32BIT_PD:
2291 u.pau32[pUser->iUserTable] = 0;
2292 break;
2293
2294 /* 64-bit entries */
2295 case PGMPOOLKIND_ROOT_PAE_PD:
2296 case PGMPOOLKIND_ROOT_PDPTR:
2297 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2298 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2299 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2300 case PGMPOOLKIND_ROOT_PML4:
2301 u.pau64[pUser->iUserTable] = 0;
2302 break;
2303
2304 default:
2305 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2306 }
2307}
2308
2309
2310/**
2311 * Clears all users of a page.
2312 */
2313static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2314{
2315 /*
2316 * Free all the user records.
2317 */
2318 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2319 uint16_t i = pPage->iUserHead;
2320 while (i != NIL_PGMPOOL_USER_INDEX)
2321 {
2322 /* Clear enter in user table. */
2323 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2324
2325 /* Free it. */
2326 const uint16_t iNext = paUsers[i].iNext;
2327 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2328 paUsers[i].iNext = pPool->iUserFreeHead;
2329 pPool->iUserFreeHead = i;
2330
2331 /* Next. */
2332 i = iNext;
2333 }
2334 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2335}
2336
2337
2338#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2339/**
2340 * Allocates a new physical cross reference extent.
2341 *
2342 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2343 * @param pVM The VM handle.
2344 * @param piPhysExt Where to store the phys ext index.
2345 */
2346PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2347{
2348 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2349 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2350 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2351 {
2352 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2353 return NULL;
2354 }
2355 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2356 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2357 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2358 *piPhysExt = iPhysExt;
2359 return pPhysExt;
2360}
2361
2362
2363/**
2364 * Frees a physical cross reference extent.
2365 *
2366 * @param pVM The VM handle.
2367 * @param iPhysExt The extent to free.
2368 */
2369void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2370{
2371 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2372 Assert(iPhysExt < pPool->cMaxPhysExts);
2373 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2374 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2375 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2376 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2377 pPool->iPhysExtFreeHead = iPhysExt;
2378}
2379
2380
2381/**
2382 * Frees a physical cross reference extent.
2383 *
2384 * @param pVM The VM handle.
2385 * @param iPhysExt The extent to free.
2386 */
2387void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2388{
2389 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2390
2391 const uint16_t iPhysExtStart = iPhysExt;
2392 PPGMPOOLPHYSEXT pPhysExt;
2393 do
2394 {
2395 Assert(iPhysExt < pPool->cMaxPhysExts);
2396 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2397 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2398 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2399
2400 /* next */
2401 iPhysExt = pPhysExt->iNext;
2402 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2403
2404 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2405 pPool->iPhysExtFreeHead = iPhysExtStart;
2406}
2407
2408/**
2409 * Insert a reference into a list of physical cross reference extents.
2410 *
2411 * @returns The new ram range flags (top 16-bits).
2412 *
2413 * @param pVM The VM handle.
2414 * @param iPhysExt The physical extent index of the list head.
2415 * @param iShwPT The shadow page table index.
2416 *
2417 */
2418static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2419{
2420 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2421 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2422
2423 /* special common case. */
2424 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2425 {
2426 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2427 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2428 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2429 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2430 }
2431
2432 /* general treatment. */
2433 const uint16_t iPhysExtStart = iPhysExt;
2434 unsigned cMax = 15;
2435 for (;;)
2436 {
2437 Assert(iPhysExt < pPool->cMaxPhysExts);
2438 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2439 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2440 {
2441 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2442 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2443 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2444 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2445 }
2446 if (!--cMax)
2447 {
2448 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2449 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2450 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2451 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2452 }
2453 }
2454
2455 /* add another extent to the list. */
2456 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2457 if (!pNew)
2458 {
2459 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2460 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2461 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2462 }
2463 pNew->iNext = iPhysExtStart;
2464 pNew->aidx[0] = iShwPT;
2465 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2466 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2467}
2468
2469
2470/**
2471 * Add a reference to guest physical page where extents are in use.
2472 *
2473 * @returns The new ram range flags (top 16-bits).
2474 *
2475 * @param pVM The VM handle.
2476 * @param u16 The ram range flags (top 16-bits).
2477 * @param iShwPT The shadow page table index.
2478 */
2479uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2480{
2481 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2482 {
2483 /*
2484 * Convert to extent list.
2485 */
2486 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2487 uint16_t iPhysExt;
2488 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2489 if (pPhysExt)
2490 {
2491 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2492 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2493 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2494 pPhysExt->aidx[1] = iShwPT;
2495 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2496 }
2497 else
2498 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2499 }
2500 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2501 {
2502 /*
2503 * Insert into the extent list.
2504 */
2505 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2506 }
2507 else
2508 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2509 return u16;
2510}
2511
2512
2513/**
2514 * Clear references to guest physical memory.
2515 *
2516 * @param pPool The pool.
2517 * @param pPage The page.
2518 * @param pHCPhys Pointer to the aHCPhys entry in the ram range.
2519 */
2520void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PRTHCPHYS pHCPhys)
2521{
2522 const unsigned cRefs = *pHCPhys >> MM_RAM_FLAGS_CREFS_SHIFT;
2523 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d *pHCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, *pHCPhys, pPage, pPage->idx));
2524
2525 uint16_t iPhysExt = (*pHCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2526 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2527 {
2528 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2529 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2530 do
2531 {
2532 Assert(iPhysExt < pPool->cMaxPhysExts);
2533
2534 /*
2535 * Look for the shadow page and check if it's all freed.
2536 */
2537 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2538 {
2539 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2540 {
2541 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2542
2543 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2544 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2545 {
2546 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d\n", *pHCPhys, pPage->idx));
2547 return;
2548 }
2549
2550 /* we can free the node. */
2551 PVM pVM = pPool->CTXSUFF(pVM);
2552 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2553 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2554 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2555 {
2556 /* lonely node */
2557 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2558 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d lonely\n", *pHCPhys, pPage->idx));
2559 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2560 }
2561 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2562 {
2563 /* head */
2564 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d head\n", *pHCPhys, pPage->idx));
2565 *pHCPhys = (*pHCPhys & MM_RAM_FLAGS_NO_REFS_MASK)
2566 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2567 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2568 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2569 }
2570 else
2571 {
2572 /* in list */
2573 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d\n", *pHCPhys, pPage->idx));
2574 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2575 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2576 }
2577 iPhysExt = iPhysExtNext;
2578 return;
2579 }
2580 }
2581
2582 /* next */
2583 iPhysExtPrev = iPhysExt;
2584 iPhysExt = paPhysExts[iPhysExt].iNext;
2585 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2586
2587 AssertFatalMsgFailed(("not-found! cRefs=%d *pHCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, *pHCPhys, pPage, pPage->idx));
2588 }
2589 else /* nothing to do */
2590 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64\n", *pHCPhys));
2591}
2592
2593
2594
2595/**
2596 * Clear references to guest physical memory.
2597 *
2598 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2599 * is assumed to be correct, so the linear search can be skipped and we can assert
2600 * at an earlier point.
2601 *
2602 * @param pPool The pool.
2603 * @param pPage The page.
2604 * @param HCPhys The host physical address corresponding to the guest page.
2605 * @param GCPhys The guest physical address corresponding to HCPhys.
2606 */
2607static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2608{
2609 /*
2610 * Walk range list.
2611 */
2612 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2613 while (pRam)
2614 {
2615 RTGCPHYS off = GCPhys - pRam->GCPhys;
2616 if (off < pRam->cb)
2617 {
2618 /* does it match? */
2619 const unsigned iPage = off >> PAGE_SHIFT;
2620 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2621 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2622 {
2623 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2624 return;
2625 }
2626 break;
2627 }
2628 pRam = CTXSUFF(pRam->pNext);
2629 }
2630 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2631}
2632
2633
2634/**
2635 * Clear references to guest physical memory.
2636 *
2637 * @param pPool The pool.
2638 * @param pPage The page.
2639 * @param HCPhys The host physical address corresponding to the guest page.
2640 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2641 */
2642static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2643{
2644 /*
2645 * Walk range list.
2646 */
2647 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2648 while (pRam)
2649 {
2650 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2651 if (off < pRam->cb)
2652 {
2653 /* does it match? */
2654 const unsigned iPage = off >> PAGE_SHIFT;
2655 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2656 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2657 {
2658 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2659 return;
2660 }
2661 break;
2662 }
2663 pRam = CTXSUFF(pRam->pNext);
2664 }
2665
2666 /*
2667 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2668 */
2669 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2670 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2671 while (pRam)
2672 {
2673 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2674 while (iPage-- > 0)
2675 {
2676 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2677 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2678 {
2679 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2680 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2681 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2682 return;
2683 }
2684 }
2685 pRam = CTXSUFF(pRam->pNext);
2686 }
2687
2688 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2689}
2690
2691
2692/**
2693 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2694 *
2695 * @param pPool The pool.
2696 * @param pPage The page.
2697 * @param pShwPT The shadow page table (mapping of the page).
2698 * @param pGstPT The guest page table.
2699 */
2700DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2701{
2702 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2703 if (pShwPT->a[i].n.u1Present)
2704 {
2705 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2706 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2707 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2708 if (!--pPage->cPresent)
2709 break;
2710 }
2711}
2712
2713
2714/**
2715 * Clear references to guest physical memory in a PAE / 32-bit page table.
2716 *
2717 * @param pPool The pool.
2718 * @param pPage The page.
2719 * @param pShwPT The shadow page table (mapping of the page).
2720 * @param pGstPT The guest page table (just a half one).
2721 */
2722DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2723{
2724 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2725 if (pShwPT->a[i].n.u1Present)
2726 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2727}
2728
2729
2730/**
2731 * Clear references to guest physical memory in a PAE / PAE page table.
2732 *
2733 * @param pPool The pool.
2734 * @param pPage The page.
2735 * @param pShwPT The shadow page table (mapping of the page).
2736 * @param pGstPT The guest page table.
2737 */
2738DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2739{
2740 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2741 if (pShwPT->a[i].n.u1Present)
2742 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2743}
2744
2745
2746/**
2747 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2748 *
2749 * @param pPool The pool.
2750 * @param pPage The page.
2751 * @param pShwPT The shadow page table (mapping of the page).
2752 */
2753DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2754{
2755 RTGCPHYS GCPhys = pPage->GCPhys;
2756 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2757 if (pShwPT->a[i].n.u1Present)
2758 {
2759 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2760 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2761 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2762 }
2763}
2764
2765
2766/**
2767 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2768 *
2769 * @param pPool The pool.
2770 * @param pPage The page.
2771 * @param pShwPT The shadow page table (mapping of the page).
2772 */
2773DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2774{
2775 RTGCPHYS GCPhys = pPage->GCPhys;
2776 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2777 if (pShwPT->a[i].n.u1Present)
2778 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2779}
2780#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2781
2782/**
2783 * Clear references to shadowed pages in a PAE page directory.
2784 *
2785 * @param pPool The pool.
2786 * @param pPage The page.
2787 * @param pShwPD The shadow page directory (mapping of the page).
2788 */
2789DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2790{
2791 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2792 {
2793 if (pShwPD->a[i].n.u1Present)
2794 {
2795 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2796 if (pSubPage)
2797 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2798 else
2799 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2800 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2801 }
2802 }
2803}
2804
2805
2806/**
2807 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2808 *
2809 * @param pPool The pool.
2810 * @param pPage The page.
2811 * @param pShwPdPtr The shadow page directory pointer table (mapping of the page).
2812 */
2813DECLINLINE(void) pgmPoolTrackDerefPDPTR64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPTR pShwPdPtr)
2814{
2815 for (unsigned i = 0; i < ELEMENTS(pShwPdPtr->a); i++)
2816 {
2817 if (pShwPdPtr->a[i].n.u1Present)
2818 {
2819 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPdPtr->a[i].u & X86_PDPE_PG_MASK);
2820 if (pSubPage)
2821 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2822 else
2823 AssertFatalMsgFailed(("%RX64\n", pShwPdPtr->a[i].u & X86_PDPE_PG_MASK));
2824 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2825 }
2826 }
2827}
2828
2829
2830/**
2831 * Clears all references made by this page.
2832 *
2833 * This includes other shadow pages and GC physical addresses.
2834 *
2835 * @param pPool The pool.
2836 * @param pPage The page.
2837 */
2838static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2839{
2840 /*
2841 * Map the shadow page and take action according to the page kind.
2842 */
2843 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2844 switch (pPage->enmKind)
2845 {
2846#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2847 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2848 {
2849 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2850 void *pvGst;
2851 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2852 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2853 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2854 break;
2855 }
2856
2857 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2858 {
2859 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2860 void *pvGst;
2861 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2862 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2863 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2864 break;
2865 }
2866
2867 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2868 {
2869 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2870 void *pvGst;
2871 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2872 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
2873 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2874 break;
2875 }
2876
2877 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
2878 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2879 {
2880 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2881 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
2882 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2883 break;
2884 }
2885
2886 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
2887 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2888 {
2889 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2890 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
2891 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2892 break;
2893 }
2894
2895#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2896 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2897 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2898 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2899 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2900 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2901 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2902 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2903 break;
2904#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2905
2906 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2907 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2908 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
2909 break;
2910
2911 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2912 pgmPoolTrackDerefPDPTR64Bit(pPool, pPage, (PX86PDPTR)pvShw);
2913 break;
2914
2915 default:
2916 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
2917 }
2918
2919 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
2920 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2921 ASMMemZeroPage(pvShw);
2922 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2923 pPage->fZeroed = true;
2924}
2925#endif /* PGMPOOL_WITH_USER_TRACKING */
2926
2927
2928/**
2929 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
2930 *
2931 * @param pPool The pool.
2932 */
2933static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
2934{
2935 /*
2936 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
2937 */
2938 Assert(NIL_PGMPOOL_IDX == 0);
2939 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
2940 {
2941 /*
2942 * Get the page address.
2943 */
2944 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2945 union
2946 {
2947 uint64_t *pau64;
2948 uint32_t *pau32;
2949 } u;
2950 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2951
2952 /*
2953 * Mark stuff not present.
2954 */
2955 switch (pPage->enmKind)
2956 {
2957 case PGMPOOLKIND_ROOT_32BIT_PD:
2958 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
2959 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2960 u.pau32[iPage] = 0;
2961 break;
2962
2963 case PGMPOOLKIND_ROOT_PAE_PD:
2964 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * 4; iPage++)
2965 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2966 u.pau64[iPage] = 0;
2967 break;
2968
2969 case PGMPOOLKIND_ROOT_PML4:
2970 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
2971 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
2972 u.pau64[iPage] = 0;
2973 break;
2974
2975 case PGMPOOLKIND_ROOT_PDPTR:
2976 /* Not root of shadowed pages currently, ignore it. */
2977 break;
2978 }
2979 }
2980
2981 /*
2982 * Paranoia (to be removed), flag a global CR3 sync.
2983 */
2984 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
2985}
2986
2987
2988/**
2989 * Flushes the entire cache.
2990 *
2991 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
2992 * and execute this CR3 flush.
2993 *
2994 * @param pPool The pool.
2995 */
2996static void pgmPoolFlushAllInt(PPGMPOOL pPool)
2997{
2998 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
2999 LogFlow(("pgmPoolFlushAllInt:\n"));
3000
3001 /*
3002 * If there are no pages in the pool, there is nothing to do.
3003 */
3004 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3005 {
3006 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3007 return;
3008 }
3009
3010 /*
3011 * Nuke the free list and reinsert all pages into it.
3012 */
3013 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3014 {
3015 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3016
3017#ifdef IN_RING3
3018 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3019#endif
3020#ifdef PGMPOOL_WITH_MONITORING
3021 if (pPage->fMonitored)
3022 pgmPoolMonitorFlush(pPool, pPage);
3023 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3024 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3025 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3026 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3027 pPage->cModifications = 0;
3028#endif
3029 pPage->GCPhys = NIL_RTGCPHYS;
3030 pPage->enmKind = PGMPOOLKIND_FREE;
3031 Assert(pPage->idx == i);
3032 pPage->iNext = i + 1;
3033 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3034 pPage->fSeenNonGlobal = false;
3035 pPage->fMonitored= false;
3036 pPage->fCached = false;
3037 pPage->fReusedFlushPending = false;
3038 pPage->fCR3Mix = false;
3039#ifdef PGMPOOL_WITH_USER_TRACKING
3040 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3041#endif
3042#ifdef PGMPOOL_WITH_CACHE
3043 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3044 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3045#endif
3046 }
3047 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3048 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3049 pPool->cUsedPages = 0;
3050
3051#ifdef PGMPOOL_WITH_USER_TRACKING
3052 /*
3053 * Zap and reinitialize the user records.
3054 */
3055 pPool->cPresent = 0;
3056 pPool->iUserFreeHead = 0;
3057 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3058 const unsigned cMaxUsers = pPool->cMaxUsers;
3059 for (unsigned i = 0; i < cMaxUsers; i++)
3060 {
3061 paUsers[i].iNext = i + 1;
3062 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3063 paUsers[i].iUserTable = 0xfffe;
3064 }
3065 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3066#endif
3067
3068#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3069 /*
3070 * Clear all the GCPhys links and rebuild the phys ext free list.
3071 */
3072 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
3073 pRam;
3074 pRam = pRam->CTXSUFF(pNext))
3075 {
3076 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3077 while (iPage-- > 0)
3078 pRam->aHCPhys[iPage] &= MM_RAM_FLAGS_NO_REFS_MASK;
3079 }
3080
3081 pPool->iPhysExtFreeHead = 0;
3082 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3083 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3084 for (unsigned i = 0; i < cMaxPhysExts; i++)
3085 {
3086 paPhysExts[i].iNext = i + 1;
3087 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3088 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3089 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3090 }
3091 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3092#endif
3093
3094#ifdef PGMPOOL_WITH_MONITORING
3095 /*
3096 * Just zap the modified list.
3097 */
3098 pPool->cModifiedPages = 0;
3099 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3100#endif
3101
3102#ifdef PGMPOOL_WITH_CACHE
3103 /*
3104 * Clear the GCPhys hash and the age list.
3105 */
3106 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3107 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3108 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3109 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3110#endif
3111
3112 /*
3113 * Flush all the special root pages.
3114 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3115 */
3116 pgmPoolFlushAllSpecialRoots(pPool);
3117 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3118 {
3119 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3120 pPage->iNext = NIL_PGMPOOL_IDX;
3121#ifdef PGMPOOL_WITH_MONITORING
3122 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3123 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3124 pPage->cModifications = 0;
3125 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3126 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3127 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3128 if (pPage->fMonitored)
3129 {
3130 PVM pVM = pPool->CTXSUFF(pVM);
3131 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3132 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pPage),
3133 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pPage),
3134 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pPage),
3135 pPool->pszAccessHandler);
3136 AssertFatalRCSuccess(rc);
3137# ifdef PGMPOOL_WITH_CACHE
3138 pgmPoolHashInsert(pPool, pPage);
3139# endif
3140 }
3141#endif
3142#ifdef PGMPOOL_WITH_USER_TRACKING
3143 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3144#endif
3145#ifdef PGMPOOL_WITH_CACHE
3146 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3147 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3148#endif
3149 }
3150
3151 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3152}
3153
3154
3155/**
3156 * Flushes a pool page.
3157 *
3158 * This moves the page to the free list after removing all user references to it.
3159 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3160 *
3161 * @returns VBox status code.
3162 * @retval VINF_SUCCESS on success.
3163 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3164 * @param pPool The pool.
3165 * @param HCPhys The HC physical address of the shadow page.
3166 */
3167int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3168{
3169 int rc = VINF_SUCCESS;
3170 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3171 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3172 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3173
3174 /*
3175 * Quietly reject any attempts at flushing any of the special root pages.
3176 */
3177 if (pPage->idx < PGMPOOL_IDX_FIRST)
3178 {
3179 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3180 return VINF_SUCCESS;
3181 }
3182
3183 /*
3184 * Mark the page as being in need of a ASMMemZeroPage().
3185 */
3186 pPage->fZeroed = false;
3187
3188#ifdef PGMPOOL_WITH_USER_TRACKING
3189 /*
3190 * Clear the page.
3191 */
3192 pgmPoolTrackClearPageUsers(pPool, pPage);
3193 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3194 pgmPoolTrackDeref(pPool, pPage);
3195 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3196#endif
3197
3198#ifdef PGMPOOL_WITH_CACHE
3199 /*
3200 * Flush it from the cache.
3201 */
3202 pgmPoolCacheFlushPage(pPool, pPage);
3203#endif /* PGMPOOL_WITH_CACHE */
3204
3205#ifdef PGMPOOL_WITH_MONITORING
3206 /*
3207 * Deregistering the monitoring.
3208 */
3209 if (pPage->fMonitored)
3210 rc = pgmPoolMonitorFlush(pPool, pPage);
3211#endif
3212
3213 /*
3214 * Free the page.
3215 */
3216 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3217 pPage->iNext = pPool->iFreeHead;
3218 pPool->iFreeHead = pPage->idx;
3219 pPage->enmKind = PGMPOOLKIND_FREE;
3220 pPage->GCPhys = NIL_RTGCPHYS;
3221 pPage->fReusedFlushPending = false;
3222
3223 pPool->cUsedPages--;
3224 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3225 return rc;
3226}
3227
3228
3229/**
3230 * Frees a usage of a pool page.
3231 *
3232 * The caller is responsible to updating the user table so that it no longer
3233 * references the shadow page.
3234 *
3235 * @param pPool The pool.
3236 * @param HCPhys The HC physical address of the shadow page.
3237 * @param iUser The shadow page pool index of the user table.
3238 * @param iUserTable The index into the user table (shadowed).
3239 */
3240void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3241{
3242 STAM_PROFILE_START(&pPool->StatFree, a);
3243 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3244 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3245 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3246#ifdef PGMPOOL_WITH_USER_TRACKING
3247 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3248#endif
3249#ifdef PGMPOOL_WITH_CACHE
3250 if (!pPage->fCached)
3251#endif
3252 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3253 STAM_PROFILE_STOP(&pPool->StatFree, a);
3254}
3255
3256
3257/**
3258 * Makes one or more free page free.
3259 *
3260 * @returns VBox status code.
3261 * @retval VINF_SUCCESS on success.
3262 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3263 *
3264 * @param pPool The pool.
3265 * @param iUser The user of the page.
3266 */
3267static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3268{
3269 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3270
3271 /*
3272 * If the pool isn't full grown yet, expand it.
3273 */
3274 if (pPool->cCurPages < pPool->cMaxPages)
3275 {
3276 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3277#ifdef IN_RING3
3278 int rc = PGMR3PoolGrow(pPool->pVMHC);
3279#else
3280 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3281#endif
3282 if (VBOX_FAILURE(rc))
3283 return rc;
3284 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3285 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3286 return VINF_SUCCESS;
3287 }
3288
3289#ifdef PGMPOOL_WITH_CACHE
3290 /*
3291 * Free one cached page.
3292 */
3293 return pgmPoolCacheFreeOne(pPool, iUser);
3294#else
3295 /*
3296 * Flush the pool.
3297 * If we have tracking enabled, it should be possible to come up with
3298 * a cheap replacement strategy...
3299 */
3300 pgmPoolFlushAllInt(pPool);
3301 return VERR_PGM_POOL_FLUSHED;
3302#endif
3303}
3304
3305
3306/**
3307 * Allocates a page from the pool.
3308 *
3309 * This page may actually be a cached page and not in need of any processing
3310 * on the callers part.
3311 *
3312 * @returns VBox status code.
3313 * @retval VINF_SUCCESS if a NEW page was allocated.
3314 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3315 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3316 * @param pVM The VM handle.
3317 * @param GCPhys The GC physical address of the page we're gonna shadow.
3318 * For 4MB and 2MB PD entries, it's the first address the
3319 * shadow PT is covering.
3320 * @param enmKind The kind of mapping.
3321 * @param iUser The shadow page pool index of the user table.
3322 * @param iUserTable The index into the user table (shadowed).
3323 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3324 */
3325int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3326{
3327 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3328 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3329 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3330
3331 *ppPage = NULL;
3332
3333#ifdef PGMPOOL_WITH_CACHE
3334 if (pPool->fCacheEnabled)
3335 {
3336 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3337 if (VBOX_SUCCESS(rc2))
3338 {
3339 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3340 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3341 return rc2;
3342 }
3343 }
3344#endif
3345
3346 /*
3347 * Allocate a new one.
3348 */
3349 int rc = VINF_SUCCESS;
3350 uint16_t iNew = pPool->iFreeHead;
3351 if (iNew == NIL_PGMPOOL_IDX)
3352 {
3353 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3354 if (VBOX_FAILURE(rc))
3355 {
3356 if (rc != VERR_PGM_POOL_CLEARED)
3357 {
3358 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3359 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3360 return rc;
3361 }
3362 rc = VERR_PGM_POOL_FLUSHED;
3363 }
3364 iNew = pPool->iFreeHead;
3365 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3366 }
3367
3368 /* unlink the free head */
3369 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3370 pPool->iFreeHead = pPage->iNext;
3371 pPage->iNext = NIL_PGMPOOL_IDX;
3372
3373 /*
3374 * Initialize it.
3375 */
3376 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3377 pPage->enmKind = enmKind;
3378 pPage->GCPhys = GCPhys;
3379 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3380 pPage->fMonitored = false;
3381 pPage->fCached = false;
3382 pPage->fReusedFlushPending = false;
3383 pPage->fCR3Mix = false;
3384#ifdef PGMPOOL_WITH_MONITORING
3385 pPage->cModifications = 0;
3386 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3387 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3388#endif
3389#ifdef PGMPOOL_WITH_USER_TRACKING
3390 pPage->cPresent = 0;
3391 pPage->iFirstPresent = ~0;
3392
3393 /*
3394 * Insert into the tracking and cache. If this fails, free the page.
3395 */
3396 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3397 if (VBOX_FAILURE(rc3))
3398 {
3399 if (rc3 != VERR_PGM_POOL_CLEARED)
3400 {
3401 pPool->cUsedPages--;
3402 pPage->enmKind = PGMPOOLKIND_FREE;
3403 pPage->GCPhys = NIL_RTGCPHYS;
3404 pPage->iNext = pPool->iFreeHead;
3405 pPool->iFreeHead = pPage->idx;
3406 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3407 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3408 return rc3;
3409 }
3410 rc = VERR_PGM_POOL_FLUSHED;
3411 }
3412#endif /* PGMPOOL_WITH_USER_TRACKING */
3413
3414 /*
3415 * Commit the allocation, clear the page and return.
3416 */
3417#ifdef VBOX_WITH_STATISTICS
3418 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3419 pPool->cUsedPagesHigh = pPool->cUsedPages;
3420#endif
3421
3422 if (!pPage->fZeroed)
3423 {
3424 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3425 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3426 ASMMemZeroPage(pv);
3427 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3428 }
3429
3430 *ppPage = pPage;
3431 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3432 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3433 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3434 return rc;
3435}
3436
3437
3438/**
3439 * Frees a usage of a pool page.
3440 *
3441 * @param pVM The VM handle.
3442 * @param HCPhys The HC physical address of the shadow page.
3443 * @param iUser The shadow page pool index of the user table.
3444 * @param iUserTable The index into the user table (shadowed).
3445 */
3446void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3447{
3448 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3449 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3450 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3451}
3452
3453
3454/**
3455 * Gets a in-use page in the pool by it's physical address.
3456 *
3457 * @returns Pointer to the page.
3458 * @param pVM The VM handle.
3459 * @param HCPhys The HC physical address of the shadow page.
3460 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3461 */
3462PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3463{
3464 /** @todo profile this! */
3465 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3466 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3467 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3468 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3469 return pPage;
3470}
3471
3472
3473/**
3474 * Flushes the entire cache.
3475 *
3476 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3477 * and execute this CR3 flush.
3478 *
3479 * @param pPool The pool.
3480 */
3481void pgmPoolFlushAll(PVM pVM)
3482{
3483 LogFlow(("pgmPoolFlushAll:\n"));
3484 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3485}
3486
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette