VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 1828

Last change on this file since 1828 was 1828, checked in by vboxsync, 18 years ago

Cleaned up cpl checking.

  • Property svn:keywords set to Id
File size: 117.5 KB
Line 
1/* $Id: PGMAllPool.cpp 1828 2007-03-30 12:52:55Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006 InnoTek Systemberatung GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * If you received this file as part of a commercial VirtualBox
18 * distribution, then only the terms of your commercial VirtualBox
19 * license agreement apply instead of the previous paragraph.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37
38#include <VBox/log.h>
39#include <VBox/err.h>
40#include <iprt/asm.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46__BEGIN_DECLS
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48#ifdef PGMPOOL_WITH_USER_TRACKING
49DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
50DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
51static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#endif
53#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
54static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
55#endif
56#ifdef PGMPOOL_WITH_CACHE
57static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
58#endif
59#ifdef PGMPOOL_WITH_MONITORING
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#endif
62#ifndef IN_RING3
63DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
64#endif
65__END_DECLS
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88#ifdef IN_GC
89/**
90 * Maps a pool page into the current context.
91 *
92 * @returns Pointer to the mapping.
93 * @param pVM The VM handle.
94 * @param pPage The page to map.
95 */
96void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
97{
98 /* general pages. */
99 if (pPage->idx >= PGMPOOL_IDX_FIRST)
100 {
101 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
102 void *pv;
103 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
104 AssertReleaseRC(rc);
105 return pv;
106 }
107
108 /* special pages. */
109 switch (pPage->idx)
110 {
111 case PGMPOOL_IDX_PD:
112 return pVM->pgm.s.pGC32BitPD;
113 case PGMPOOL_IDX_PAE_PD:
114 return pVM->pgm.s.apGCPaePDs[0];
115 case PGMPOOL_IDX_PDPTR:
116 return pVM->pgm.s.pGCPaePDPTR;
117 case PGMPOOL_IDX_PML4:
118 return pVM->pgm.s.pGCPaePML4;
119 default:
120 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
121 return NULL;
122 }
123}
124#endif /* IN_GC */
125
126
127#ifdef PGMPOOL_WITH_MONITORING
128/**
129 * Determin the size of a write instruction.
130 * @returns number of bytes written.
131 * @param pDis The disassembler state.
132 */
133static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
134{
135 /*
136 * This is very crude and possibly wrong for some opcodes,
137 * but since it's not really supposed to be called we can
138 * probably live with that.
139 */
140 return DISGetParamSize(pDis, &pDis->param1);
141}
142
143
144/**
145 * Flushes a chain of pages sharing the same access monitor.
146 *
147 * @returns VBox status code suitable for scheduling.
148 * @param pPool The pool.
149 * @param pPage A page in the chain.
150 */
151int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
152{
153 /*
154 * Find the list head.
155 */
156 uint16_t idx = pPage->idx;
157 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
158 {
159 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
160 {
161 idx = pPage->iMonitoredPrev;
162 Assert(idx != pPage->idx);
163 pPage = &pPool->aPages[idx];
164 }
165 }
166
167 /*
168 * Itereate the list flushing each shadow page.
169 */
170 int rc = VINF_SUCCESS;
171 for (;;)
172 {
173 idx = pPage->iMonitoredNext;
174 Assert(idx != pPage->idx);
175 if (pPage->idx >= PGMPOOL_IDX_FIRST)
176 {
177 int rc2 = pgmPoolFlushPage(pPool, pPage);
178 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
179 rc = VINF_PGM_SYNC_CR3;
180 }
181 /* next */
182 if (idx == NIL_PGMPOOL_IDX)
183 break;
184 pPage = &pPool->aPages[idx];
185 }
186 return rc;
187}
188
189
190/**
191 * Wrapper for getting the current context pointer to the entry begin modified.
192 *
193 * @returns Pointer to the current context mapping of the entry.
194 * @param pPool The pool.
195 * @param pvFault The fault virtual address.
196 * @param GCPhysFault The fault physical address.
197 * @param cbEntry The entry size.
198 */
199#ifdef IN_RING3
200DECLINLINE(void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
201#else
202DECLINLINE(void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
203#endif
204{
205#ifdef IN_GC
206 return (RTGCPTR)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
207
208#elif defined(IN_RING0)
209 void *pvRet;
210 int rc = PGMRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
211 AssertFatalRCSuccess(rc);
212 return pvRet;
213
214#elif defined(IN_RING3)
215 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
216#else
217# error "huh?"
218#endif
219}
220
221
222/**
223 * Process shadow entries before they are changed by the guest.
224 *
225 * For PT entries we will clear them. For PD entries, we'll simply check
226 * for mapping conflicts and set the SyncCR3 FF if found.
227 *
228 * @param pPool The pool.
229 * @param pPage The head page.
230 * @param GCPhysFault The guest physical fault address.
231 * @param uAddress In R0 and GC this is the guest context fault address (flat).
232 * In R3 this is the host context 'fault' address.
233 * @param pCpu The disassembler state for figuring out the write size.
234 * This need not be specified if the caller knows we won't do cross entry accesses.
235 */
236#ifdef IN_RING3
237void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
238#else
239void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
240#endif
241{
242 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
243 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
244 for (;;)
245 {
246 union
247 {
248 void *pv;
249 PX86PT pPT;
250 PX86PTPAE pPTPae;
251 PX86PD pPD;
252 PX86PDPAE pPDPae;
253 } uShw;
254 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
255
256 switch (pPage->enmKind)
257 {
258 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
259 {
260 const unsigned iShw = off / sizeof(X86PTE);
261 if (uShw.pPT->a[iShw].n.u1Present)
262 {
263# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
264 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
265 pgmPoolTracDerefGCPhysHint(pPool, pPage,
266 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
267 pGstPte->u & X86_PTE_PG_MASK);
268# endif
269 uShw.pPT->a[iShw].u = 0;
270 }
271 break;
272 }
273
274 /* page/2 sized */
275 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
276 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
277 {
278 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
279 if (uShw.pPTPae->a[iShw].n.u1Present)
280 {
281# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
282 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
283 pgmPoolTracDerefGCPhysHint(pPool, pPage,
284 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
285 pGstPte->u & X86_PTE_PG_MASK);
286# endif
287 uShw.pPTPae->a[iShw].u = 0;
288 }
289 }
290 break;
291
292 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
293 {
294 const unsigned iShw = off / sizeof(X86PTPAE);
295 if (uShw.pPTPae->a[iShw].n.u1Present)
296 {
297# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
298 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
299 pgmPoolTracDerefGCPhysHint(pPool, pPage,
300 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
301 pGstPte->u & X86_PTE_PAE_PG_MASK);
302# endif
303 uShw.pPTPae->a[iShw].u = 0;
304 }
305 break;
306 }
307
308 case PGMPOOLKIND_ROOT_32BIT_PD:
309 {
310 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
311 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
312 {
313 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
314 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
315 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
316 }
317 /* paranoia / a bit assumptive. */
318 else if ( pCpu
319 && (off & 4)
320 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
321 {
322 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
323 if ( iShw2 != iShw
324 && iShw2 < ELEMENTS(uShw.pPD->a)
325 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
326 {
327 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
328 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
329 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
330 }
331 }
332#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
333 if ( uShw.pPD->a[iShw].n.u1Present
334 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
335 {
336 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
337# ifdef IN_GC /* TLB load - we're pushing things a bit... */
338 ASMProbeReadByte(pvAddress);
339# endif
340 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
341 uShw.pPD->a[iShw].u = 0;
342 }
343#endif
344 break;
345 }
346
347 case PGMPOOLKIND_ROOT_PAE_PD:
348 {
349 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
350 for (unsigned i = 0; i < 2; i++, iShw++)
351 {
352 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
353 {
354 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
355 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
356 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
357 }
358 /* paranoia / a bit assumptive. */
359 else if ( pCpu
360 && (off & 4)
361 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
362 {
363 const unsigned iShw2 = iShw + 2;
364 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
365 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
366 {
367 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
368 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
369 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
370 }
371 }
372#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
373 if ( uShw.pPDPae->a[iShw].n.u1Present
374 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
375 {
376 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
377# ifdef IN_GC /* TLB load - we're pushing things a bit... */
378 ASMProbeReadByte(pvAddress);
379# endif
380 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
381 uShw.pPDPae->a[iShw].u = 0;
382 }
383#endif
384 }
385 break;
386 }
387
388 default:
389 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
390 }
391
392 /* next */
393 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
394 return;
395 pPage = &pPool->aPages[pPage->iMonitoredNext];
396 }
397}
398
399
400# ifndef IN_RING3
401/**
402 * Checks if a access could be a fork operation in progress.
403 *
404 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
405 *
406 * @returns true if it's likly that we're forking, otherwise false.
407 * @param pPool The pool.
408 * @param pCpu The disassembled instruction.
409 * @param offFault The access offset.
410 */
411DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
412{
413 /*
414 * i386 linux is using btr to clear X86_PTE_RW.
415 * The functions involved are (2.6.16 source inspection):
416 * clear_bit
417 * ptep_set_wrprotect
418 * copy_one_pte
419 * copy_pte_range
420 * copy_pmd_range
421 * copy_pud_range
422 * copy_page_range
423 * dup_mmap
424 * dup_mm
425 * copy_mm
426 * copy_process
427 * do_fork
428 */
429 if ( pCpu->pCurInstr->opcode == OP_BTR
430 && !(offFault & 4)
431 /** @todo Validate that the bit index is X86_PTE_RW. */
432 )
433 {
434 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
435 return true;
436 }
437 return false;
438}
439
440
441/**
442 * Determin whether the page is likely to have been reused.
443 *
444 * @returns true if we consider the page as being reused for a different purpose.
445 * @returns false if we consider it to still be a paging page.
446 * @param pPage The page in question.
447 * @param pCpu The disassembly info for the faulting insturction.
448 * @param pvFault The fault address.
449 *
450 * @remark The REP prefix check is left to the caller because of STOSD/W.
451 */
452DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
453{
454 switch (pCpu->pCurInstr->opcode)
455 {
456 case OP_PUSH:
457 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
458 return true;
459 case OP_PUSHF:
460 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
461 return true;
462 case OP_PUSHA:
463 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
464 return true;
465 case OP_FXSAVE:
466 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
467 return true;
468 }
469 if ( (pCpu->param1.flags & USE_REG_GEN32)
470 && (pCpu->param1.base.reg_gen32 == USE_REG_ESP))
471 {
472 Log4(("pgmPoolMonitorIsReused: ESP\n"));
473 return true;
474 }
475
476 //if (pPage->fCR3Mix)
477 // return false;
478 return false;
479}
480
481
482/**
483 * Flushes the page being accessed.
484 *
485 * @returns VBox status code suitable for scheduling.
486 * @param pVM The VM handle.
487 * @param pPool The pool.
488 * @param pPage The pool page (head).
489 * @param pCpu The disassembly of the write instruction.
490 * @param pRegFrame The trap register frame.
491 * @param GCPhysFault The fault address as guest physical address.
492 * @param pvFault The fault address.
493 */
494static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
495 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
496{
497 /*
498 * First, do the flushing.
499 */
500 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
501
502 /*
503 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
504 */
505 uint32_t cbWritten;
506 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
507 if (VBOX_SUCCESS(rc2))
508 pRegFrame->eip += pCpu->opsize;
509 else if (rc2 == VERR_EM_INTERPRETER)
510 {
511#ifdef IN_GC
512 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip))
513 {
514 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04:%RGv, ignoring.\n",
515 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
516 rc = VINF_SUCCESS;
517 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
518 }
519 else
520#endif
521 {
522 rc = VINF_EM_RAW_EMULATE_INSTR;
523 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
524 }
525 }
526 else
527 rc = rc2;
528
529 /* See use in pgmPoolAccessHandlerSimple(). */
530 PGM_INVL_GUEST_TLBS();
531
532 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
533 return rc;
534
535}
536
537
538/**
539 * Handles the STOSD write accesses.
540 *
541 * @returns VBox status code suitable for scheduling.
542 * @param pVM The VM handle.
543 * @param pPool The pool.
544 * @param pPage The pool page (head).
545 * @param pCpu The disassembly of the write instruction.
546 * @param pRegFrame The trap register frame.
547 * @param GCPhysFault The fault address as guest physical address.
548 * @param pvFault The fault address.
549 */
550DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
551 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
552{
553 /*
554 * Increment the modification counter and insert it into the list
555 * of modified pages the first time.
556 */
557 if (!pPage->cModifications++)
558 pgmPoolMonitorModifiedInsert(pPool, pPage);
559
560 /*
561 * Execute REP STOSD.
562 *
563 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
564 * write situation, meaning that it's safe to write here.
565 */
566#ifdef IN_GC
567 uint32_t *pu32 = (uint32_t *)pvFault;
568#else
569 RTGCPTR pu32 = pvFault;
570#endif
571 while (pRegFrame->ecx)
572 {
573 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pu32, NULL);
574#ifdef IN_GC
575 *pu32++ = pRegFrame->eax;
576#else
577 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
578 pu32 += 4;
579#endif
580 GCPhysFault += 4;
581 pRegFrame->edi += 4;
582 pRegFrame->ecx--;
583 }
584 pRegFrame->eip += pCpu->opsize;
585
586 /* See use in pgmPoolAccessHandlerSimple(). */
587 PGM_INVL_GUEST_TLBS();
588
589 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
590 return VINF_SUCCESS;
591}
592
593
594/**
595 * Handles the simple write accesses.
596 *
597 * @returns VBox status code suitable for scheduling.
598 * @param pVM The VM handle.
599 * @param pPool The pool.
600 * @param pPage The pool page (head).
601 * @param pCpu The disassembly of the write instruction.
602 * @param pRegFrame The trap register frame.
603 * @param GCPhysFault The fault address as guest physical address.
604 * @param pvFault The fault address.
605 */
606DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
607 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
608{
609 /*
610 * Increment the modification counter and insert it into the list
611 * of modified pages the first time.
612 */
613 if (!pPage->cModifications++)
614 pgmPoolMonitorModifiedInsert(pPool, pPage);
615
616 /*
617 * Clear all the pages. ASSUMES that pvFault is readable.
618 */
619 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
620
621 /*
622 * Interpret the instruction.
623 */
624 uint32_t cb;
625 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
626 if (VBOX_SUCCESS(rc))
627 pRegFrame->eip += pCpu->opsize;
628 else if (rc == VERR_EM_INTERPRETER)
629 {
630# ifdef IN_GC
631 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)(RTGCUINTPTR)pCpu->opaddr))
632 {
633 /* We're not able to handle this in ring-3, so fix the interpreter! */
634 /** @note Should be fine. There's no need to flush the whole thing. */
635#ifndef DEBUG_sandervl
636 AssertMsgFailed(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
637 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
638#endif
639 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch1);
640 rc = pgmPoolMonitorChainFlush(pPool, pPage);
641 }
642 else
643# endif
644 {
645 rc = VINF_EM_RAW_EMULATE_INSTR;
646 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
647 }
648 }
649
650 /*
651 * Quick hack, with logging enabled we're getting stale
652 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
653 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
654 * have to be fixed to support this. But that'll have to wait till next week.
655 *
656 * An alternative is to keep track of the changed PTEs together with the
657 * GCPhys from the guest PT. This may proove expensive though.
658 *
659 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
660 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
661 */
662 PGM_INVL_GUEST_TLBS();
663
664 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
665 return rc;
666}
667
668
669/**
670 * \#PF Handler callback for PT write accesses.
671 *
672 * @returns VBox status code (appropriate for GC return).
673 * @param pVM VM Handle.
674 * @param uErrorCode CPU Error code.
675 * @param pRegFrame Trap register frame.
676 * NULL on DMA and other non CPU access.
677 * @param pvFault The fault address (cr2).
678 * @param GCPhysFault The GC physical address corresponding to pvFault.
679 * @param pvUser User argument.
680 */
681DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
682{
683 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
684 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
685 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
686 LogFlow(("pgmPoolAccessHandler: pvFault=%p pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
687
688 /*
689 * We should ALWAYS have the list head as user parameter. This
690 * is because we use that page to record the changes.
691 */
692 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
693
694 /*
695 * Disassemble the faulting instruction.
696 */
697 DISCPUSTATE Cpu;
698 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
699 AssertRCReturn(rc, rc);
700
701 /*
702 * Check if it's worth dealing with.
703 */
704 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
705 || pPage->fCR3Mix)
706 && !pgmPoolMonitorIsReused(pPage, &Cpu,pvFault)
707 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
708 {
709 /*
710 * Simple instructions, no REP prefix.
711 */
712 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
713 {
714 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
715 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
716 return rc;
717 }
718
719 /* Get the current privilege level. */
720 uint32_t cpl = CPUMGetGuestCPL(pVM, pRegFrame);
721
722 /*
723 * Windows is frequently doing small memset() operations (netio test 4k+).
724 * We have to deal with these or we'll kill the cache and performance.
725 */
726
727 if ( Cpu.pCurInstr->opcode == OP_STOSWD
728 && cpl == 0
729 && pRegFrame->ecx <= 0x20
730 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
731 && !((uintptr_t)pvFault & 3)
732 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
733 && Cpu.mode == CPUMODE_32BIT
734 && Cpu.opmode == CPUMODE_32BIT
735 && Cpu.addrmode == CPUMODE_32BIT
736 && Cpu.prefix == PREFIX_REP
737 && !pRegFrame->eflags.Bits.u1DF
738 )
739 {
740 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
741 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
742 return rc;
743 }
744
745 /* REP prefix, don't bother. */
746 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
747 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
748 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
749 }
750
751 /*
752 * Not worth it, so flush it.
753 */
754 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
755 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
756 return rc;
757}
758
759# endif /* !IN_RING3 */
760#endif /* PGMPOOL_WITH_MONITORING */
761
762
763
764#ifdef PGMPOOL_WITH_CACHE
765/**
766 * Inserts a page into the GCPhys hash table.
767 *
768 * @param pPool The pool.
769 * @param pPage The page.
770 */
771DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
772{
773 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
774 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
775 pPage->iNext = pPool->aiHash[iHash];
776 pPool->aiHash[iHash] = pPage->idx;
777}
778
779
780/**
781 * Removes a page from the GCPhys hash table.
782 *
783 * @param pPool The pool.
784 * @param pPage The page.
785 */
786DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
787{
788 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
789 if (pPool->aiHash[iHash] == pPage->idx)
790 pPool->aiHash[iHash] = pPage->iNext;
791 else
792 {
793 uint16_t iPrev = pPool->aiHash[iHash];
794 for (;;)
795 {
796 const int16_t i = pPool->aPages[iPrev].iNext;
797 if (i == pPage->idx)
798 {
799 pPool->aPages[iPrev].iNext = pPage->iNext;
800 break;
801 }
802 if (i == NIL_PGMPOOL_IDX)
803 {
804 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
805 break;
806 }
807 iPrev = i;
808 }
809 }
810 pPage->iNext = NIL_PGMPOOL_IDX;
811}
812
813
814/**
815 * Frees up one cache page.
816 *
817 * @returns VBox status code.
818 * @retval VINF_SUCCESS on success.
819 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
820 * @param pPool The pool.
821 * @param iUser The user index.
822 */
823static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
824{
825 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
826 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
827
828 /*
829 * Select one page from the tail of the age list.
830 */
831 uint16_t iToFree = pPool->iAgeTail;
832 if (iToFree == iUser)
833 iToFree = pPool->aPages[iToFree].iAgePrev;
834/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
835 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
836 {
837 uint16_t i = pPool->aPages[iToFree].iAgePrev;
838 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
839 {
840 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
841 continue;
842 iToFree = i;
843 break;
844 }
845 }
846*/
847 Assert(iToFree != iUser);
848 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
849
850 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
851 if (rc == VINF_SUCCESS)
852 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
853 return rc;
854}
855
856
857/**
858 * Checks if a kind mismatch is really a page being reused
859 * or if it's just normal remappings.
860 *
861 * @returns true if reused and the cached page (enmKind1) should be flushed
862 * @returns false if not reused.
863 * @param enmKind1 The kind of the cached page.
864 * @param enmKind2 The kind of the requested page.
865 */
866static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
867{
868 switch (enmKind1)
869 {
870 /*
871 * It's prefectly fine to reuse these, except for PAE stuff.
872 */
873 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
874 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
875 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
876 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
877 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
878 switch (enmKind2)
879 {
880 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
881 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
882 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
883 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
884 return true;
885 default:
886 return false;
887 }
888
889 /*
890 * It's prefectly fine to reuse these, except for non-PAE stuff.
891 */
892 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
893 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
894 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
895 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
896 switch (enmKind2)
897 {
898 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
899 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
900 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
901 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
902 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
903 return true;
904 default:
905 return false;
906 }
907
908 /*
909 * These cannot be flushed, and it's common to reused the PDs as PTs.
910 */
911 case PGMPOOLKIND_ROOT_32BIT_PD:
912 case PGMPOOLKIND_ROOT_PAE_PD:
913 case PGMPOOLKIND_ROOT_PDPTR:
914 case PGMPOOLKIND_ROOT_PML4:
915 return false;
916
917 default:
918 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
919 }
920}
921
922
923/**
924 * Attempts to satisfy a pgmPoolAlloc request from the cache.
925 *
926 * @returns VBox status code.
927 * @retval VINF_PGM_CACHED_PAGE on success.
928 * @retval VERR_FILE_NOT_FOUND if not found.
929 * @param pPool The pool.
930 * @param GCPhys The GC physical address of the page we're gonna shadow.
931 * @param enmKind The kind of mapping.
932 * @param iUser The shadow page pool index of the user table.
933 * @param iUserTable The index into the user table (shadowed).
934 * @param ppPage Where to store the pointer to the page.
935 */
936static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
937{
938 /*
939 * Look up the GCPhys in the hash.
940 */
941 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
942 if (i != NIL_PGMPOOL_IDX)
943 {
944 do
945 {
946 PPGMPOOLPAGE pPage = &pPool->aPages[i];
947 if (pPage->GCPhys == GCPhys)
948 {
949 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
950 {
951 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
952 if (VBOX_SUCCESS(rc))
953 {
954 *ppPage = pPage;
955 STAM_COUNTER_INC(&pPool->StatCacheHits);
956 return VINF_PGM_CACHED_PAGE;
957 }
958 return rc;
959 }
960
961 /*
962 * The kind is different. In some cases we should now flush the page
963 * as it has been reused, but in most cases this is normal remapping
964 * of PDs as PT or big pages using the GCPhys field in a sligly
965 * different way than the other kinds.
966 */
967 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
968 {
969 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
970 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
971 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
972 break;
973 }
974 }
975
976 /* next */
977 i = pPage->iNext;
978 } while (i != NIL_PGMPOOL_IDX);
979 }
980
981 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
982 STAM_COUNTER_INC(&pPool->StatCacheMisses);
983 return VERR_FILE_NOT_FOUND;
984}
985
986
987/**
988 * Inserts a page into the cache.
989 *
990 * @param pPool The pool.
991 * @param pPage The cached page.
992 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
993 */
994static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
995{
996 /*
997 * Insert into the GCPhys hash if the page is fit for that.
998 */
999 Assert(!pPage->fCached);
1000 if (fCanBeCached)
1001 {
1002 pPage->fCached = true;
1003 pgmPoolHashInsert(pPool, pPage);
1004 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1005 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1006 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1007 }
1008 else
1009 {
1010 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1011 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1012 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1013 }
1014
1015 /*
1016 * Insert at the head of the age list.
1017 */
1018 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1019 pPage->iAgeNext = pPool->iAgeHead;
1020 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1021 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1022 else
1023 pPool->iAgeTail = pPage->idx;
1024 pPool->iAgeHead = pPage->idx;
1025}
1026
1027
1028/**
1029 * Flushes a cached page.
1030 *
1031 * @param pPool The pool.
1032 * @param pPage The cached page.
1033 */
1034static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1035{
1036 /*
1037 * Remove the page from the hash.
1038 */
1039 if (pPage->fCached)
1040 {
1041 pPage->fCached = false;
1042 pgmPoolHashRemove(pPool, pPage);
1043 }
1044 else
1045 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1046
1047 /*
1048 * Remove it from the age list.
1049 */
1050 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1051 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1052 else
1053 pPool->iAgeTail = pPage->iAgePrev;
1054 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1055 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1056 else
1057 pPool->iAgeHead = pPage->iAgeNext;
1058 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1059 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1060}
1061#endif /* PGMPOOL_WITH_CACHE */
1062
1063
1064#ifdef PGMPOOL_WITH_MONITORING
1065/**
1066 * Looks for pages sharing the monitor.
1067 *
1068 * @returns Pointer to the head page.
1069 * @returns NULL if not found.
1070 * @param pPool The Pool
1071 * @param pNewPage The page which is going to be monitored.
1072 */
1073static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1074{
1075#ifdef PGMPOOL_WITH_CACHE
1076 /*
1077 * Look up the GCPhys in the hash.
1078 */
1079 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1080 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1081 if (i == NIL_PGMPOOL_IDX)
1082 return NULL;
1083 do
1084 {
1085 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1086 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1087 && pPage != pNewPage)
1088 {
1089 switch (pPage->enmKind)
1090 {
1091 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1092 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1093 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1094 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1095 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1096 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1097 case PGMPOOLKIND_ROOT_32BIT_PD:
1098 case PGMPOOLKIND_ROOT_PAE_PD:
1099 case PGMPOOLKIND_ROOT_PDPTR:
1100 case PGMPOOLKIND_ROOT_PML4:
1101 {
1102 /* find the head */
1103 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1104 {
1105 Assert(pPage->iMonitoredPrev != pPage->idx);
1106 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1107 }
1108 return pPage;
1109 }
1110
1111 /* ignore, no monitoring. */
1112 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1113 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1114 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1115 break;
1116 default:
1117 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1118 }
1119 }
1120
1121 /* next */
1122 i = pPage->iNext;
1123 } while (i != NIL_PGMPOOL_IDX);
1124#endif
1125 return NULL;
1126}
1127
1128/**
1129 * Enabled write monitoring of a guest page.
1130 *
1131 * @returns VBox status code.
1132 * @retval VINF_SUCCESS on success.
1133 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1134 * @param pPool The pool.
1135 * @param pPage The cached page.
1136 */
1137static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1138{
1139 /*
1140 * Filter out the relevant kinds.
1141 */
1142 switch (pPage->enmKind)
1143 {
1144 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1145 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1146 break;
1147
1148 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1149 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1150 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1151 /* Nothing to monitor here. */
1152 return VINF_SUCCESS;
1153
1154 case PGMPOOLKIND_ROOT_32BIT_PD:
1155 case PGMPOOLKIND_ROOT_PAE_PD:
1156#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1157 break;
1158#endif
1159 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1160 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1161 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1162 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1163 case PGMPOOLKIND_ROOT_PDPTR:
1164 case PGMPOOLKIND_ROOT_PML4:
1165 default:
1166 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1167 }
1168
1169 /*
1170 * Install handler.
1171 */
1172 int rc;
1173 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1174 if (pPageHead)
1175 {
1176 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1177 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1178 pPage->iMonitoredPrev = pPageHead->idx;
1179 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1180 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1181 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1182 pPageHead->iMonitoredNext = pPage->idx;
1183 rc = VINF_SUCCESS;
1184 }
1185 else
1186 {
1187 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1188 PVM pVM = pPool->CTXSUFF(pVM);
1189 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1190 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1191 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1192 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pPage),
1193 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pPage),
1194 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pPage),
1195 pPool->pszAccessHandler);
1196 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1197 * the heap size should suffice. */
1198 AssertFatalRC(rc);
1199 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1200 rc = VERR_PGM_POOL_CLEARED;
1201 }
1202 pPage->fMonitored = true;
1203 return rc;
1204}
1205
1206
1207/**
1208 * Disables write monitoring of a guest page.
1209 *
1210 * @returns VBox status code.
1211 * @retval VINF_SUCCESS on success.
1212 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1213 * @param pPool The pool.
1214 * @param pPage The cached page.
1215 */
1216static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1217{
1218 /*
1219 * Filter out the relevant kinds.
1220 */
1221 switch (pPage->enmKind)
1222 {
1223 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1224 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1225 break;
1226
1227 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1228 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1229 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1230 /* Nothing to monitor here. */
1231 return VINF_SUCCESS;
1232
1233 case PGMPOOLKIND_ROOT_32BIT_PD:
1234 case PGMPOOLKIND_ROOT_PAE_PD:
1235#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1236 break;
1237#endif
1238 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1239 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1240 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1241 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1242 case PGMPOOLKIND_ROOT_PDPTR:
1243 case PGMPOOLKIND_ROOT_PML4:
1244 default:
1245 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1246 }
1247
1248 /*
1249 * Remove the page from the monitored list or uninstall it if last.
1250 */
1251 const PVM pVM = pPool->CTXSUFF(pVM);
1252 int rc;
1253 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1254 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1255 {
1256 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1257 {
1258 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1259 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1260 pNewHead->fCR3Mix = pPage->fCR3Mix;
1261 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1262 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pNewHead),
1263 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pNewHead),
1264 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pNewHead),
1265 pPool->pszAccessHandler);
1266 AssertFatalRCSuccess(rc);
1267 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1268 }
1269 else
1270 {
1271 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1272 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1273 {
1274 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1275 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1276 }
1277 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1278 rc = VINF_SUCCESS;
1279 }
1280 }
1281 else
1282 {
1283 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1284 AssertFatalRC(rc);
1285 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1286 rc = VERR_PGM_POOL_CLEARED;
1287 }
1288 pPage->fMonitored = false;
1289
1290 /*
1291 * Remove it from the list of modified pages (if in it).
1292 */
1293 pgmPoolMonitorModifiedRemove(pPool, pPage);
1294
1295 return rc;
1296}
1297
1298
1299#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1300/**
1301 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1302 *
1303 * @param pPool The Pool.
1304 * @param pPage A page in the chain.
1305 * @param fCR3Mix The new fCR3Mix value.
1306 */
1307static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1308{
1309 /* current */
1310 pPage->fCR3Mix = fCR3Mix;
1311
1312 /* before */
1313 int16_t idx = pPage->iMonitoredPrev;
1314 while (idx != NIL_PGMPOOL_IDX)
1315 {
1316 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1317 idx = pPool->aPages[idx].iMonitoredPrev;
1318 }
1319
1320 /* after */
1321 idx = pPage->iMonitoredNext;
1322 while (idx != NIL_PGMPOOL_IDX)
1323 {
1324 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1325 idx = pPool->aPages[idx].iMonitoredNext;
1326 }
1327}
1328
1329
1330/**
1331 * Installs or modifies monitoring of a CR3 page (special).
1332 *
1333 * We're pretending the CR3 page is shadowed by the pool so we can use the
1334 * generic mechanisms in detecting chained monitoring. (This also gives us a
1335 * tast of what code changes are required to really pool CR3 shadow pages.)
1336 *
1337 * @returns VBox status code.
1338 * @param pPool The pool.
1339 * @param idxRoot The CR3 (root) page index.
1340 * @param GCPhysCR3 The (new) CR3 value.
1341 */
1342int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1343{
1344 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1345 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1346 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1347 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1348
1349 /*
1350 * The unlikely case where it already matches.
1351 */
1352 if (pPage->GCPhys == GCPhysCR3)
1353 {
1354 Assert(pPage->fMonitored);
1355 return VINF_SUCCESS;
1356 }
1357
1358 /*
1359 * Flush the current monitoring and remove it from the hash.
1360 */
1361 int rc = VINF_SUCCESS;
1362 if (pPage->fMonitored)
1363 {
1364 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1365 rc = pgmPoolMonitorFlush(pPool, pPage);
1366 if (rc == VERR_PGM_POOL_CLEARED)
1367 rc = VINF_SUCCESS;
1368 else
1369 AssertFatalRC(rc);
1370 pgmPoolHashRemove(pPool, pPage);
1371 }
1372
1373 /*
1374 * Monitor the page at the new location and insert it into the hash.
1375 */
1376 pPage->GCPhys = GCPhysCR3;
1377 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1378 if (rc2 != VERR_PGM_POOL_CLEARED)
1379 {
1380 AssertFatalRC(rc2);
1381 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1382 rc = rc2;
1383 }
1384 pgmPoolHashInsert(pPool, pPage);
1385 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1386 return rc;
1387}
1388
1389
1390/**
1391 * Removes the monitoring of a CR3 page (special).
1392 *
1393 * @returns VBox status code.
1394 * @param pPool The pool.
1395 * @param idxRoot The CR3 (root) page index.
1396 */
1397int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1398{
1399 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1400 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1401 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1402 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1403
1404 if (!pPage->fMonitored)
1405 return VINF_SUCCESS;
1406
1407 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1408 int rc = pgmPoolMonitorFlush(pPool, pPage);
1409 if (rc != VERR_PGM_POOL_CLEARED)
1410 AssertFatalRC(rc);
1411 else
1412 rc = VINF_SUCCESS;
1413 pgmPoolHashRemove(pPool, pPage);
1414 Assert(!pPage->fMonitored);
1415 pPage->GCPhys = NIL_RTGCPHYS;
1416 return rc;
1417}
1418#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1419
1420
1421/**
1422 * Inserts the page into the list of modified pages.
1423 *
1424 * @param pPool The pool.
1425 * @param pPage The page.
1426 */
1427void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1428{
1429 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1430 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1431 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1432 && pPool->iModifiedHead != pPage->idx,
1433 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1434 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1435 pPool->iModifiedHead, pPool->cModifiedPages));
1436
1437 pPage->iModifiedNext = pPool->iModifiedHead;
1438 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1439 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1440 pPool->iModifiedHead = pPage->idx;
1441 pPool->cModifiedPages++;
1442#ifdef VBOX_WITH_STATISTICS
1443 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1444 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1445#endif
1446}
1447
1448
1449/**
1450 * Removes the page from the list of modified pages and resets the
1451 * moficiation counter.
1452 *
1453 * @param pPool The pool.
1454 * @param pPage The page which is believed to be in the list of modified pages.
1455 */
1456static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1457{
1458 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1459 if (pPool->iModifiedHead == pPage->idx)
1460 {
1461 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1462 pPool->iModifiedHead = pPage->iModifiedNext;
1463 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1464 {
1465 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1466 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1467 }
1468 pPool->cModifiedPages--;
1469 }
1470 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1471 {
1472 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1473 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1474 {
1475 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1476 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1477 }
1478 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1479 pPool->cModifiedPages--;
1480 }
1481 else
1482 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1483 pPage->cModifications = 0;
1484}
1485
1486
1487/**
1488 * Zaps the list of modified pages, resetting their modification counters in the process.
1489 *
1490 * @param pVM The VM handle.
1491 */
1492void pgmPoolMonitorModifiedClearAll(PVM pVM)
1493{
1494 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1495 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1496
1497 unsigned cPages = 0; NOREF(cPages);
1498 uint16_t idx = pPool->iModifiedHead;
1499 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1500 while (idx != NIL_PGMPOOL_IDX)
1501 {
1502 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1503 idx = pPage->iModifiedNext;
1504 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1505 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1506 pPage->cModifications = 0;
1507 Assert(++cPages);
1508 }
1509 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1510 pPool->cModifiedPages = 0;
1511}
1512
1513
1514/**
1515 * Clear all shadow pages and clear all modification counters.
1516 *
1517 * @param pVM The VM handle.
1518 * @remark Should only be used when monitoring is available, thus placed in
1519 * the PGMPOOL_WITH_MONITORING #ifdef.
1520 */
1521void pgmPoolClearAll(PVM pVM)
1522{
1523 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1524 STAM_PROFILE_START(&pPool->StatClearAll, c);
1525 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1526
1527 /*
1528 * Iterate all the pages until we've encountered all that in use.
1529 * This is simple but not quite optimal solution.
1530 */
1531 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1532 unsigned cLeft = pPool->cUsedPages;
1533 unsigned iPage = pPool->cCurPages;
1534 while (--iPage >= PGMPOOL_IDX_FIRST)
1535 {
1536 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1537 if (pPage->GCPhys != NIL_RTGCPHYS)
1538 {
1539 switch (pPage->enmKind)
1540 {
1541 /*
1542 * We only care about shadow page tables.
1543 */
1544 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1545 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1546 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1547 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1548 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1549 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1550 {
1551#ifdef PGMPOOL_WITH_USER_TRACKING
1552 if (pPage->cPresent)
1553#endif
1554 {
1555 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1556 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1557 ASMMemZeroPage(pvShw);
1558 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1559#ifdef PGMPOOL_WITH_USER_TRACKING
1560 pPage->cPresent = 0;
1561 pPage->iFirstPresent = ~0;
1562#endif
1563 }
1564 }
1565 /* fall thru */
1566
1567 default:
1568 Assert(!pPage->cModifications || ++cModifiedPages);
1569 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1570 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1571 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1572 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1573 pPage->cModifications = 0;
1574 break;
1575
1576 }
1577 if (!--cLeft)
1578 break;
1579 }
1580 }
1581
1582 /* swipe the special pages too. */
1583 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1584 {
1585 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1586 if (pPage->GCPhys != NIL_RTGCPHYS)
1587 {
1588 Assert(!pPage->cModifications || ++cModifiedPages);
1589 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1590 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1591 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1592 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1593 pPage->cModifications = 0;
1594 }
1595 }
1596
1597 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1598 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1599 pPool->cModifiedPages = 0;
1600
1601#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1602 /*
1603 * Clear all the GCPhys links and rebuild the phys ext free list.
1604 */
1605 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
1606 pRam;
1607 pRam = pRam->CTXSUFF(pNext))
1608 {
1609 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1610 while (iPage-- > 0)
1611 pRam->aHCPhys[iPage] &= MM_RAM_FLAGS_NO_REFS_MASK;
1612 }
1613
1614 pPool->iPhysExtFreeHead = 0;
1615 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1616 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1617 for (unsigned i = 0; i < cMaxPhysExts; i++)
1618 {
1619 paPhysExts[i].iNext = i + 1;
1620 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1621 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1622 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1623 }
1624 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1625#endif
1626
1627
1628 pPool->cPresent = 0;
1629 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1630}
1631#endif /* PGMPOOL_WITH_MONITORING */
1632
1633
1634#ifdef PGMPOOL_WITH_USER_TRACKING
1635/**
1636 * Frees up at least one user entry.
1637 *
1638 * @returns VBox status code.
1639 * @retval VINF_SUCCESS if successfully added.
1640 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1641 * @param pPool The pool.
1642 * @param iUser The user index.
1643 */
1644static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1645{
1646 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1647#ifdef PGMPOOL_WITH_CACHE
1648 /*
1649 * Just free cached pages in a braindead fashion.
1650 */
1651 /** @todo walk the age list backwards and free the first with usage. */
1652 int rc = VINF_SUCCESS;
1653 do
1654 {
1655 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1656 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1657 rc = rc2;
1658 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1659 return rc;
1660#else
1661 /*
1662 * Lazy approach.
1663 */
1664 pgmPoolFlushAllInt(pPool);
1665 return VERR_PGM_POOL_FLUSHED;
1666#endif
1667}
1668
1669
1670/**
1671 * Inserts a page into the cache.
1672 *
1673 * This will create user node for the page, insert it into the GCPhys
1674 * hash, and insert it into the age list.
1675 *
1676 * @returns VBox status code.
1677 * @retval VINF_SUCCESS if successfully added.
1678 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1679 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1680 * @param pPool The pool.
1681 * @param pPage The cached page.
1682 * @param GCPhys The GC physical address of the page we're gonna shadow.
1683 * @param iUser The user index.
1684 * @param iUserTable The user table index.
1685 */
1686DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1687{
1688 int rc = VINF_SUCCESS;
1689 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1690
1691 /*
1692 * Find free a user node.
1693 */
1694 uint16_t i = pPool->iUserFreeHead;
1695 if (i == NIL_PGMPOOL_USER_INDEX)
1696 {
1697 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1698 if (VBOX_FAILURE(rc))
1699 return rc;
1700 i = pPool->iUserFreeHead;
1701 }
1702
1703 /*
1704 * Unlink the user node from the free list,
1705 * initialize and insert it into the user list.
1706 */
1707 pPool->iUserFreeHead = pUser[i].iNext;
1708 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1709 pUser[i].iUser = iUser;
1710 pUser[i].iUserTable = iUserTable;
1711 pPage->iUserHead = i;
1712
1713 /*
1714 * Insert into cache and enable monitoring of the guest page if enabled.
1715 *
1716 * Until we implement caching of all levels, including the CR3 one, we'll
1717 * have to make sure we don't try monitor & cache any recursive reuse of
1718 * a monitored CR3 page. Because all windows versions are doing this we'll
1719 * have to be able to do combined access monitoring, CR3 + PT and
1720 * PD + PT (guest PAE).
1721 *
1722 * Update:
1723 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1724 */
1725#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1726# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1727 const bool fCanBeMonitored = true;
1728# else
1729 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1730 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1731 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1732# endif
1733# ifdef PGMPOOL_WITH_CACHE
1734 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1735# endif
1736 if (fCanBeMonitored)
1737 {
1738# ifdef PGMPOOL_WITH_MONITORING
1739 rc = pgmPoolMonitorInsert(pPool, pPage);
1740 if (rc == VERR_PGM_POOL_CLEARED)
1741 {
1742 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1743# ifndef PGMPOOL_WITH_CACHE
1744 pgmPoolMonitorFlush(pPool, pPage);
1745 rc = VERR_PGM_POOL_FLUSHED;
1746# endif
1747 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1748 pUser[i].iNext = pPool->iUserFreeHead;
1749 pUser[i].iUser = NIL_PGMPOOL_IDX;
1750 pPool->iUserFreeHead = i;
1751 }
1752 }
1753# endif
1754#endif /* PGMPOOL_WITH_MONITORING */
1755 return rc;
1756}
1757
1758
1759# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1760/**
1761 * Adds a user reference to a page.
1762 *
1763 * This will
1764 * This will move the page to the head of the
1765 *
1766 * @returns VBox status code.
1767 * @retval VINF_SUCCESS if successfully added.
1768 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1769 * @param pPool The pool.
1770 * @param pPage The cached page.
1771 * @param iUser The user index.
1772 * @param iUserTable The user table.
1773 */
1774static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1775{
1776 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1777
1778# ifdef VBOX_STRICT
1779 /*
1780 * Check that the entry doesn't already exists.
1781 */
1782 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1783 {
1784 uint16_t i = pPage->iUserHead;
1785 do
1786 {
1787 Assert(i < pPool->cMaxUsers);
1788 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%d %d\n", iUser, iUserTable));
1789 i = paUsers[i].iNext;
1790 } while (i != NIL_PGMPOOL_USER_INDEX);
1791 }
1792# endif
1793
1794 /*
1795 * Allocate a user node.
1796 */
1797 uint16_t i = pPool->iUserFreeHead;
1798 if (i == NIL_PGMPOOL_USER_INDEX)
1799 {
1800 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1801 if (VBOX_FAILURE(rc))
1802 return rc;
1803 i = pPool->iUserFreeHead;
1804 }
1805 pPool->iUserFreeHead = paUsers[i].iNext;
1806
1807 /*
1808 * Initialize the user node and insert it.
1809 */
1810 paUsers[i].iNext = pPage->iUserHead;
1811 paUsers[i].iUser = iUser;
1812 paUsers[i].iUserTable = iUserTable;
1813 pPage->iUserHead = i;
1814
1815# ifdef PGMPOOL_WITH_CACHE
1816 /*
1817 * Tell the cache to update its replacement stats for this page.
1818 */
1819 pgmPoolCacheUsed(pPool, pPage);
1820# endif
1821 return VINF_SUCCESS;
1822}
1823# endif /* PGMPOOL_WITH_CACHE */
1824
1825
1826/**
1827 * Frees a user record associated with a page.
1828 *
1829 * This does not clear the entry in the user table, it simply replaces the
1830 * user record to the chain of free records.
1831 *
1832 * @param pPool The pool.
1833 * @param HCPhys The HC physical address of the shadow page.
1834 * @param iUser The shadow page pool index of the user table.
1835 * @param iUserTable The index into the user table (shadowed).
1836 */
1837static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1838{
1839 /*
1840 * Unlink and free the specified user entry.
1841 */
1842 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1843
1844 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1845 uint16_t i = pPage->iUserHead;
1846 if ( i != NIL_PGMPOOL_USER_INDEX
1847 && paUsers[i].iUser == iUser
1848 && paUsers[i].iUserTable == iUserTable)
1849 {
1850 pPage->iUserHead = paUsers[i].iNext;
1851
1852 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1853 paUsers[i].iNext = pPool->iUserFreeHead;
1854 pPool->iUserFreeHead = i;
1855 return;
1856 }
1857
1858 /* General: Linear search. */
1859 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1860 while (i != NIL_PGMPOOL_USER_INDEX)
1861 {
1862 if ( paUsers[i].iUser == iUser
1863 && paUsers[i].iUserTable == iUserTable)
1864 {
1865 if (iPrev != NIL_PGMPOOL_USER_INDEX)
1866 paUsers[iPrev].iNext = paUsers[i].iNext;
1867 else
1868 pPage->iUserHead = paUsers[i].iNext;
1869
1870 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1871 paUsers[i].iNext = pPool->iUserFreeHead;
1872 pPool->iUserFreeHead = i;
1873 return;
1874 }
1875 iPrev = i;
1876 i = paUsers[i].iNext;
1877 }
1878
1879 /* Fatal: didn't find it */
1880 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
1881 iUser, iUserTable, pPage->GCPhys));
1882}
1883
1884
1885/**
1886 * Gets the entry size of a shadow table.
1887 *
1888 * @param enmKind
1889 * The kind of page.
1890 *
1891 * @returns The size of the entry in bytes. That is, 4 or 8.
1892 * @returns If the kind is not for a table, an assertion is raised and 0 is
1893 * returned.
1894 */
1895DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
1896{
1897 switch (enmKind)
1898 {
1899 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1900 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1901 case PGMPOOLKIND_ROOT_32BIT_PD:
1902 return 4;
1903
1904 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1905 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1906 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1907 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1908 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1909 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1910 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1911 case PGMPOOLKIND_ROOT_PAE_PD:
1912 case PGMPOOLKIND_ROOT_PDPTR:
1913 case PGMPOOLKIND_ROOT_PML4:
1914 return 8;
1915
1916 default:
1917 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1918 }
1919}
1920
1921
1922/**
1923 * Gets the entry size of a guest table.
1924 *
1925 * @param enmKind
1926 * The kind of page.
1927 *
1928 * @returns The size of the entry in bytes. That is, 4 or 8.
1929 * @returns If the kind is not for a table, an assertion is raised and 0 is
1930 * returned.
1931 */
1932DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
1933{
1934 switch (enmKind)
1935 {
1936 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1937 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1938 case PGMPOOLKIND_ROOT_32BIT_PD:
1939 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1940 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1941 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1942 return 4;
1943
1944 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1945 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1946 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1947 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1948 case PGMPOOLKIND_ROOT_PAE_PD:
1949 case PGMPOOLKIND_ROOT_PDPTR:
1950 case PGMPOOLKIND_ROOT_PML4:
1951 return 8;
1952
1953 default:
1954 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1955 }
1956}
1957
1958
1959#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1960/**
1961 * Scans one shadow page table for mappings of a physical page.
1962 *
1963 * @param pVM The VM handle.
1964 * @param pHCPhys The aHCPhys ramrange entry in question.
1965 * @param iShw The shadow page table.
1966 * @param cRefs The number of references made in that PT.
1967 */
1968static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCRTHCPHYS pHCPhys, uint16_t iShw, uint16_t cRefs)
1969{
1970 LogFlow(("pgmPoolTrackFlushGCPhysPT: pHCPhys=%p:{%RHp} iShw=%d cRefs=%d\n", pHCPhys, *pHCPhys, iShw, cRefs));
1971 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1972
1973 /*
1974 * Assert sanity.
1975 */
1976 Assert(cRefs == 1);
1977 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
1978 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
1979
1980 /*
1981 * Then, clear the actual mappings to the page in the shadow PT.
1982 */
1983 switch (pPage->enmKind)
1984 {
1985 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1986 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1987 {
1988 const uint32_t u32 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
1989 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
1990 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
1991 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
1992 {
1993 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
1994 pPT->a[i].u = 0;
1995 cRefs--;
1996 if (!cRefs)
1997 return;
1998 }
1999#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2000 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2001 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2002 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2003 {
2004 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2005 pPT->a[i].u = 0;
2006 }
2007#endif
2008 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2009 break;
2010 }
2011
2012 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2013 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2014 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2015 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2016 {
2017 const uint64_t u64 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2018 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2019 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2020 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2021 {
2022 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2023 pPT->a[i].u = 0;
2024 cRefs--;
2025 if (!cRefs)
2026 return;
2027 }
2028#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2029 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2030 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2031 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2032 {
2033 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2034 pPT->a[i].u = 0;
2035 }
2036#endif
2037 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2038 break;
2039 }
2040
2041 default:
2042 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2043 }
2044}
2045
2046
2047/**
2048 * Scans one shadow page table for mappings of a physical page.
2049 *
2050 * @param pVM The VM handle.
2051 * @param pHCPhys The aHCPhys ramrange entry in question.
2052 * @param iShw The shadow page table.
2053 * @param cRefs The number of references made in that PT.
2054 */
2055void pgmPoolTrackFlushGCPhysPT(PVM pVM, PRTHCPHYS pHCPhys, uint16_t iShw, uint16_t cRefs)
2056{
2057 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2058 LogFlow(("pgmPoolTrackFlushGCPhysPT: pHCPhys=%p:{%RHp} iShw=%d cRefs=%d\n", pHCPhys, *pHCPhys, iShw, cRefs));
2059 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2060 pgmPoolTrackFlushGCPhysPTInt(pVM, pHCPhys, iShw, cRefs);
2061 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2062 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2063}
2064
2065
2066/**
2067 * Flushes a list of shadow page tables mapping the same physical page.
2068 *
2069 * @param pVM The VM handle.
2070 * @param pHCPhys The aHCPhys ramrange entry in question.
2071 * @param iPhysExt The physical cross reference extent list to flush.
2072 */
2073void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PRTHCPHYS pHCPhys, uint16_t iPhysExt)
2074{
2075 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2076 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2077 LogFlow(("pgmPoolTrackFlushGCPhysPTs: pHCPhys=%p:{%RHp} iPhysExt\n", pHCPhys, *pHCPhys, iPhysExt));
2078
2079 const uint16_t iPhysExtStart = iPhysExt;
2080 PPGMPOOLPHYSEXT pPhysExt;
2081 do
2082 {
2083 Assert(iPhysExt < pPool->cMaxPhysExts);
2084 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2085 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2086 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2087 {
2088 pgmPoolTrackFlushGCPhysPTInt(pVM, pHCPhys, pPhysExt->aidx[i], 1);
2089 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2090 }
2091
2092 /* next */
2093 iPhysExt = pPhysExt->iNext;
2094 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2095
2096 /* insert the list into the free list and clear the ram range entry. */
2097 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2098 pPool->iPhysExtFreeHead = iPhysExtStart;
2099 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2100
2101 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2102}
2103#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2104
2105
2106/**
2107 * Scans all shadow page tables for mappings of a physical page.
2108 *
2109 * This may be slow, but it's most likely more efficient than cleaning
2110 * out the entire page pool / cache.
2111 *
2112 * @returns VBox status code.
2113 * @retval VINF_SUCCESS if all references has been successfully cleared.
2114 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2115 * a page pool cleaning.
2116 *
2117 * @param pVM The VM handle.
2118 * @param pHCPhys The aHCPhys ramrange entry in question.
2119 */
2120int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PRTHCPHYS pHCPhys)
2121{
2122 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2123 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2124 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d *pHCPhys=%RHp\n",
2125 pPool->cUsedPages, pPool->cPresent, *pHCPhys));
2126
2127#if 1
2128 /*
2129 * There is a limit to what makes sense.
2130 */
2131 if (pPool->cPresent > 1024)
2132 {
2133 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2134 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2135 return VINF_PGM_GCPHYS_ALIASED;
2136 }
2137#endif
2138
2139 /*
2140 * Iterate all the pages until we've encountered all that in use.
2141 * This is simple but not quite optimal solution.
2142 */
2143 const uint64_t u64 = (*pHCPhys & X86_PTE_PAE_PG_MASK) | X86_PTE_P;
2144 const uint32_t u32 = u64;
2145 unsigned cLeft = pPool->cUsedPages;
2146 unsigned iPage = pPool->cCurPages;
2147 while (--iPage >= PGMPOOL_IDX_FIRST)
2148 {
2149 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2150 if (pPage->GCPhys != NIL_RTGCPHYS)
2151 {
2152 switch (pPage->enmKind)
2153 {
2154 /*
2155 * We only care about shadow page tables.
2156 */
2157 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2158 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2159 {
2160 unsigned cPresent = pPage->cPresent;
2161 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2162 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2163 if (pPT->a[i].n.u1Present)
2164 {
2165 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2166 {
2167 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2168 pPT->a[i].u = 0;
2169 }
2170 if (!--cPresent)
2171 break;
2172 }
2173 break;
2174 }
2175
2176 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2177 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2178 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2179 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2180 {
2181 unsigned cPresent = pPage->cPresent;
2182 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2183 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2184 if (pPT->a[i].n.u1Present)
2185 {
2186 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2187 {
2188 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2189 pPT->a[i].u = 0;
2190 }
2191 if (!--cPresent)
2192 break;
2193 }
2194 break;
2195 }
2196 }
2197 if (!--cLeft)
2198 break;
2199 }
2200 }
2201
2202 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2203 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2204 return VINF_SUCCESS;
2205}
2206
2207
2208/**
2209 * Clears the user entry in a user table.
2210 *
2211 * This is used to remove all references to a page when flushing it.
2212 */
2213static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2214{
2215 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2216 Assert(pUser->iUser < pPool->cCurPages);
2217
2218 /*
2219 * Map the user page.
2220 */
2221 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2222 union
2223 {
2224 uint64_t *pau64;
2225 uint32_t *pau32;
2226 } u;
2227 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2228
2229#ifdef VBOX_STRICT
2230 /*
2231 * Some sanity checks.
2232 */
2233 switch (pUserPage->enmKind)
2234 {
2235 case PGMPOOLKIND_ROOT_32BIT_PD:
2236 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2237 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2238 break;
2239 case PGMPOOLKIND_ROOT_PAE_PD:
2240 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2241 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2242 break;
2243 case PGMPOOLKIND_ROOT_PDPTR:
2244 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2245 Assert(pUser->iUserTable < 4);
2246 break;
2247 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2248 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2249 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2250 break;
2251 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2252 case PGMPOOLKIND_ROOT_PML4:
2253 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2254 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2255 break;
2256 default:
2257 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2258 break;
2259 }
2260#endif /* VBOX_STRICT */
2261
2262 /*
2263 * Clear the entry in the user page.
2264 */
2265 switch (pUserPage->enmKind)
2266 {
2267 /* 32-bit entries */
2268 case PGMPOOLKIND_ROOT_32BIT_PD:
2269 u.pau32[pUser->iUserTable] = 0;
2270 break;
2271
2272 /* 64-bit entries */
2273 case PGMPOOLKIND_ROOT_PAE_PD:
2274 case PGMPOOLKIND_ROOT_PDPTR:
2275 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2276 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2277 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2278 case PGMPOOLKIND_ROOT_PML4:
2279 u.pau64[pUser->iUserTable] = 0;
2280 break;
2281
2282 default:
2283 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2284 }
2285}
2286
2287
2288/**
2289 * Clears all users of a page.
2290 */
2291static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2292{
2293 /*
2294 * Free all the user records.
2295 */
2296 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2297 uint16_t i = pPage->iUserHead;
2298 while (i != NIL_PGMPOOL_USER_INDEX)
2299 {
2300 /* Clear enter in user table. */
2301 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2302
2303 /* Free it. */
2304 const uint16_t iNext = paUsers[i].iNext;
2305 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2306 paUsers[i].iNext = pPool->iUserFreeHead;
2307 pPool->iUserFreeHead = i;
2308
2309 /* Next. */
2310 i = iNext;
2311 }
2312 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2313}
2314
2315
2316#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2317/**
2318 * Allocates a new physical cross reference extent.
2319 *
2320 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2321 * @param pVM The VM handle.
2322 * @param piPhysExt Where to store the phys ext index.
2323 */
2324PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2325{
2326 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2327 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2328 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2329 {
2330 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2331 return NULL;
2332 }
2333 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2334 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2335 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2336 *piPhysExt = iPhysExt;
2337 return pPhysExt;
2338}
2339
2340
2341/**
2342 * Frees a physical cross reference extent.
2343 *
2344 * @param pVM The VM handle.
2345 * @param iPhysExt The extent to free.
2346 */
2347void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2348{
2349 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2350 Assert(iPhysExt < pPool->cMaxPhysExts);
2351 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2352 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2353 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2354 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2355 pPool->iPhysExtFreeHead = iPhysExt;
2356}
2357
2358
2359/**
2360 * Frees a physical cross reference extent.
2361 *
2362 * @param pVM The VM handle.
2363 * @param iPhysExt The extent to free.
2364 */
2365void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2366{
2367 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2368
2369 const uint16_t iPhysExtStart = iPhysExt;
2370 PPGMPOOLPHYSEXT pPhysExt;
2371 do
2372 {
2373 Assert(iPhysExt < pPool->cMaxPhysExts);
2374 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2375 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2376 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2377
2378 /* next */
2379 iPhysExt = pPhysExt->iNext;
2380 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2381
2382 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2383 pPool->iPhysExtFreeHead = iPhysExtStart;
2384}
2385
2386/**
2387 * Insert a reference into a list of physical cross reference extents.
2388 *
2389 * @returns The new ram range flags (top 16-bits).
2390 *
2391 * @param pVM The VM handle.
2392 * @param iPhysExt The physical extent index of the list head.
2393 * @param iShwPT The shadow page table index.
2394 *
2395 */
2396static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2397{
2398 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2399 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2400
2401 /* special common case. */
2402 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2403 {
2404 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2405 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2406 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2407 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2408 }
2409
2410 /* general treatment. */
2411 const uint16_t iPhysExtStart = iPhysExt;
2412 unsigned cMax = 15;
2413 for (;;)
2414 {
2415 Assert(iPhysExt < pPool->cMaxPhysExts);
2416 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2417 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2418 {
2419 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2420 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2421 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2422 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2423 }
2424 if (!--cMax)
2425 {
2426 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2427 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2428 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2429 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2430 }
2431 }
2432
2433 /* add another extent to the list. */
2434 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2435 if (!pNew)
2436 {
2437 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2438 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2439 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2440 }
2441 pNew->iNext = iPhysExtStart;
2442 pNew->aidx[0] = iShwPT;
2443 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2444 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2445}
2446
2447
2448/**
2449 * Add a reference to guest physical page where extents are in use.
2450 *
2451 * @returns The new ram range flags (top 16-bits).
2452 *
2453 * @param pVM The VM handle.
2454 * @param u16 The ram range flags (top 16-bits).
2455 * @param iShwPT The shadow page table index.
2456 */
2457uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2458{
2459 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2460 {
2461 /*
2462 * Convert to extent list.
2463 */
2464 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2465 uint16_t iPhysExt;
2466 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2467 if (pPhysExt)
2468 {
2469 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2470 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2471 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2472 pPhysExt->aidx[1] = iShwPT;
2473 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2474 }
2475 else
2476 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2477 }
2478 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2479 {
2480 /*
2481 * Insert into the extent list.
2482 */
2483 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2484 }
2485 else
2486 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2487 return u16;
2488}
2489
2490
2491/**
2492 * Clear references to guest physical memory.
2493 *
2494 * @param pPool The pool.
2495 * @param pPage The page.
2496 * @param pHCPhys Pointer to the aHCPhys entry in the ram range.
2497 */
2498void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PRTHCPHYS pHCPhys)
2499{
2500 const unsigned cRefs = *pHCPhys >> MM_RAM_FLAGS_CREFS_SHIFT;
2501 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d *pHCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, *pHCPhys, pPage, pPage->idx));
2502
2503 uint16_t iPhysExt = (*pHCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2504 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2505 {
2506 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2507 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2508 do
2509 {
2510 Assert(iPhysExt < pPool->cMaxPhysExts);
2511
2512 /*
2513 * Look for the shadow page and check if it's all freed.
2514 */
2515 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2516 {
2517 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2518 {
2519 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2520
2521 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2522 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2523 {
2524 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d\n", *pHCPhys, pPage->idx));
2525 return;
2526 }
2527
2528 /* we can free the node. */
2529 PVM pVM = pPool->CTXSUFF(pVM);
2530 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2531 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2532 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2533 {
2534 /* lonely node */
2535 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2536 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d lonely\n", *pHCPhys, pPage->idx));
2537 *pHCPhys &= MM_RAM_FLAGS_NO_REFS_MASK;
2538 }
2539 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2540 {
2541 /* head */
2542 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d head\n", *pHCPhys, pPage->idx));
2543 *pHCPhys = (*pHCPhys & MM_RAM_FLAGS_NO_REFS_MASK)
2544 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2545 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2546 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2547 }
2548 else
2549 {
2550 /* in list */
2551 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64 idx=%d\n", *pHCPhys, pPage->idx));
2552 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2553 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2554 }
2555 iPhysExt = iPhysExtNext;
2556 return;
2557 }
2558 }
2559
2560 /* next */
2561 iPhysExtPrev = iPhysExt;
2562 iPhysExt = paPhysExts[iPhysExt].iNext;
2563 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2564
2565 AssertFatalMsgFailed(("not-found! cRefs=%d *pHCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, *pHCPhys, pPage, pPage->idx));
2566 }
2567 else /* nothing to do */
2568 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: *pHCPhys=%RX64\n", *pHCPhys));
2569}
2570
2571
2572
2573/**
2574 * Clear references to guest physical memory.
2575 *
2576 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2577 * is assumed to be correct, so the linear search can be skipped and we can assert
2578 * at an earlier point.
2579 *
2580 * @param pPool The pool.
2581 * @param pPage The page.
2582 * @param HCPhys The host physical address corresponding to the guest page.
2583 * @param GCPhys The guest physical address corresponding to HCPhys.
2584 */
2585static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2586{
2587 /*
2588 * Walk range list.
2589 */
2590 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2591 while (pRam)
2592 {
2593 RTGCPHYS off = GCPhys - pRam->GCPhys;
2594 if (off < pRam->cb)
2595 {
2596 /* does it match? */
2597 const unsigned iPage = off >> PAGE_SHIFT;
2598 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2599 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2600 {
2601 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2602 return;
2603 }
2604 break;
2605 }
2606 pRam = CTXSUFF(pRam->pNext);
2607 }
2608 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2609}
2610
2611
2612/**
2613 * Clear references to guest physical memory.
2614 *
2615 * @param pPool The pool.
2616 * @param pPage The page.
2617 * @param HCPhys The host physical address corresponding to the guest page.
2618 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2619 */
2620static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2621{
2622 /*
2623 * Walk range list.
2624 */
2625 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2626 while (pRam)
2627 {
2628 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2629 if (off < pRam->cb)
2630 {
2631 /* does it match? */
2632 const unsigned iPage = off >> PAGE_SHIFT;
2633 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2634 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2635 {
2636 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2637 return;
2638 }
2639 break;
2640 }
2641 pRam = CTXSUFF(pRam->pNext);
2642 }
2643
2644 /*
2645 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2646 */
2647 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2648 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2649 while (pRam)
2650 {
2651 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2652 while (iPage-- > 0)
2653 {
2654 Assert(pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK);
2655 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
2656 {
2657 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2658 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2659 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aHCPhys[iPage]);
2660 return;
2661 }
2662 }
2663 pRam = CTXSUFF(pRam->pNext);
2664 }
2665
2666 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2667}
2668
2669
2670/**
2671 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2672 *
2673 * @param pPool The pool.
2674 * @param pPage The page.
2675 * @param pShwPT The shadow page table (mapping of the page).
2676 * @param pGstPT The guest page table.
2677 */
2678DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2679{
2680 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2681 if (pShwPT->a[i].n.u1Present)
2682 {
2683 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2684 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2685 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2686 if (!--pPage->cPresent)
2687 break;
2688 }
2689}
2690
2691
2692/**
2693 * Clear references to guest physical memory in a PAE / 32-bit page table.
2694 *
2695 * @param pPool The pool.
2696 * @param pPage The page.
2697 * @param pShwPT The shadow page table (mapping of the page).
2698 * @param pGstPT The guest page table (just a half one).
2699 */
2700DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2701{
2702 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2703 if (pShwPT->a[i].n.u1Present)
2704 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2705}
2706
2707
2708/**
2709 * Clear references to guest physical memory in a PAE / PAE page table.
2710 *
2711 * @param pPool The pool.
2712 * @param pPage The page.
2713 * @param pShwPT The shadow page table (mapping of the page).
2714 * @param pGstPT The guest page table.
2715 */
2716DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2717{
2718 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2719 if (pShwPT->a[i].n.u1Present)
2720 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2721}
2722
2723
2724/**
2725 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2726 *
2727 * @param pPool The pool.
2728 * @param pPage The page.
2729 * @param pShwPT The shadow page table (mapping of the page).
2730 * @param pGstPT The guest page table.
2731 */
2732DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2733{
2734 RTGCPHYS GCPhys = pPage->GCPhys;
2735 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2736 if (pShwPT->a[i].n.u1Present)
2737 {
2738 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2739 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2740 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2741 }
2742}
2743
2744
2745/**
2746 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2747 *
2748 * @param pPool The pool.
2749 * @param pPage The page.
2750 * @param pShwPT The shadow page table (mapping of the page).
2751 */
2752DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2753{
2754 RTGCPHYS GCPhys = pPage->GCPhys;
2755 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2756 if (pShwPT->a[i].n.u1Present)
2757 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2758}
2759#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2760
2761/**
2762 * Clear references to shadowed pages in a PAE page directory.
2763 *
2764 * @param pPool The pool.
2765 * @param pPage The page.
2766 * @param pShwPD The shadow page directory (mapping of the page).
2767 */
2768DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2769{
2770 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2771 {
2772 if (pShwPD->a[i].n.u1Present)
2773 {
2774 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2775 if (pSubPage)
2776 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2777 else
2778 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2779 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2780 }
2781 }
2782}
2783
2784
2785/**
2786 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2787 *
2788 * @param pPool The pool.
2789 * @param pPage The page.
2790 * @param pShwPdPtr The shadow page directory pointer table (mapping of the page).
2791 */
2792DECLINLINE(void) pgmPoolTrackDerefPDPTR64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPTR pShwPdPtr)
2793{
2794 for (unsigned i = 0; i < ELEMENTS(pShwPdPtr->a); i++)
2795 {
2796 if (pShwPdPtr->a[i].n.u1Present)
2797 {
2798 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPdPtr->a[i].u & X86_PDPE_PG_MASK);
2799 if (pSubPage)
2800 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2801 else
2802 AssertFatalMsgFailed(("%RX64\n", pShwPdPtr->a[i].u & X86_PDPE_PG_MASK));
2803 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2804 }
2805 }
2806}
2807
2808
2809/**
2810 * Clears all references made by this page.
2811 *
2812 * This includes other shadow pages and GC physical addresses.
2813 *
2814 * @param pPool The pool.
2815 * @param pPage The page.
2816 */
2817static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2818{
2819 /*
2820 * Map the shadow page and take action according to the page kind.
2821 */
2822 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2823 switch (pPage->enmKind)
2824 {
2825#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2826 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2827 {
2828 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2829 void *pvGst;
2830 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2831 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2832 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2833 break;
2834 }
2835
2836 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2837 {
2838 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2839 void *pvGst;
2840 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2841 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2842 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2843 break;
2844 }
2845
2846 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2847 {
2848 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2849 void *pvGst;
2850 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2851 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
2852 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2853 break;
2854 }
2855
2856 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2857 {
2858 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2859 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
2860 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2861 break;
2862 }
2863
2864 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2865 {
2866 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2867 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
2868 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2869 break;
2870 }
2871
2872#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2873 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2874 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2875 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2876 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2877 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2878 break;
2879#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2880
2881 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2882 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2883 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
2884 break;
2885
2886 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2887 pgmPoolTrackDerefPDPTR64Bit(pPool, pPage, (PX86PDPTR)pvShw);
2888 break;
2889
2890 default:
2891 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
2892 }
2893
2894 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
2895 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2896 ASMMemZeroPage(pvShw);
2897 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2898 pPage->fZeroed = true;
2899}
2900#endif /* PGMPOOL_WITH_USER_TRACKING */
2901
2902
2903/**
2904 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
2905 *
2906 * @param pPool The pool.
2907 */
2908static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
2909{
2910 /*
2911 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
2912 */
2913 Assert(NIL_PGMPOOL_IDX == 0);
2914 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
2915 {
2916 /*
2917 * Get the page address.
2918 */
2919 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2920 union
2921 {
2922 uint64_t *pau64;
2923 uint32_t *pau32;
2924 } u;
2925 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2926
2927 /*
2928 * Mark stuff not present.
2929 */
2930 switch (pPage->enmKind)
2931 {
2932 case PGMPOOLKIND_ROOT_32BIT_PD:
2933 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
2934 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2935 u.pau32[iPage] = 0;
2936 break;
2937
2938 case PGMPOOLKIND_ROOT_PAE_PD:
2939 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * 4; iPage++)
2940 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2941 u.pau64[iPage] = 0;
2942 break;
2943
2944 case PGMPOOLKIND_ROOT_PML4:
2945 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
2946 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
2947 u.pau64[iPage] = 0;
2948 break;
2949
2950 case PGMPOOLKIND_ROOT_PDPTR:
2951 /* Not root of shadowed pages currently, ignore it. */
2952 break;
2953 }
2954 }
2955
2956 /*
2957 * Paranoia (to be removed), flag a global CR3 sync.
2958 */
2959 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
2960}
2961
2962
2963/**
2964 * Flushes the entire cache.
2965 *
2966 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
2967 * and execute this CR3 flush.
2968 *
2969 * @param pPool The pool.
2970 */
2971static void pgmPoolFlushAllInt(PPGMPOOL pPool)
2972{
2973 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
2974 LogFlow(("pgmPoolFlushAllInt:\n"));
2975
2976 /*
2977 * If there are no pages in the pool, there is nothing to do.
2978 */
2979 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
2980 {
2981 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
2982 return;
2983 }
2984
2985 /*
2986 * Nuke the free list and reinsert all pages into it.
2987 */
2988 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
2989 {
2990 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2991
2992#ifdef IN_RING3
2993 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
2994#endif
2995#ifdef PGMPOOL_WITH_MONITORING
2996 if (pPage->fMonitored)
2997 pgmPoolMonitorFlush(pPool, pPage);
2998 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2999 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3000 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3001 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3002 pPage->cModifications = 0;
3003#endif
3004 pPage->GCPhys = NIL_RTGCPHYS;
3005 pPage->enmKind = PGMPOOLKIND_FREE;
3006 Assert(pPage->idx == i);
3007 pPage->iNext = i + 1;
3008 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3009 pPage->fSeenNonGlobal = false;
3010 pPage->fMonitored= false;
3011 pPage->fCached = false;
3012 pPage->fReusedFlushPending = false;
3013 pPage->fCR3Mix = false;
3014#ifdef PGMPOOL_WITH_USER_TRACKING
3015 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3016#endif
3017#ifdef PGMPOOL_WITH_CACHE
3018 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3019 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3020#endif
3021 }
3022 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3023 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3024 pPool->cUsedPages = 0;
3025
3026#ifdef PGMPOOL_WITH_USER_TRACKING
3027 /*
3028 * Zap and reinitialize the user records.
3029 */
3030 pPool->cPresent = 0;
3031 pPool->iUserFreeHead = 0;
3032 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3033 const unsigned cMaxUsers = pPool->cMaxUsers;
3034 for (unsigned i = 0; i < cMaxUsers; i++)
3035 {
3036 paUsers[i].iNext = i + 1;
3037 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3038 paUsers[i].iUserTable = 0xfffe;
3039 }
3040 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3041#endif
3042
3043#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3044 /*
3045 * Clear all the GCPhys links and rebuild the phys ext free list.
3046 */
3047 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
3048 pRam;
3049 pRam = pRam->CTXSUFF(pNext))
3050 {
3051 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3052 while (iPage-- > 0)
3053 pRam->aHCPhys[iPage] &= MM_RAM_FLAGS_NO_REFS_MASK;
3054 }
3055
3056 pPool->iPhysExtFreeHead = 0;
3057 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3058 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3059 for (unsigned i = 0; i < cMaxPhysExts; i++)
3060 {
3061 paPhysExts[i].iNext = i + 1;
3062 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3063 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3064 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3065 }
3066 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3067#endif
3068
3069#ifdef PGMPOOL_WITH_MONITORING
3070 /*
3071 * Just zap the modified list.
3072 */
3073 pPool->cModifiedPages = 0;
3074 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3075#endif
3076
3077#ifdef PGMPOOL_WITH_CACHE
3078 /*
3079 * Clear the GCPhys hash and the age list.
3080 */
3081 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3082 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3083 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3084 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3085#endif
3086
3087 /*
3088 * Flush all the special root pages.
3089 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3090 */
3091 pgmPoolFlushAllSpecialRoots(pPool);
3092 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3093 {
3094 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3095 pPage->iNext = NIL_PGMPOOL_IDX;
3096#ifdef PGMPOOL_WITH_MONITORING
3097 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3098 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3099 pPage->cModifications = 0;
3100 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3101 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3102 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3103 if (pPage->fMonitored)
3104 {
3105 PVM pVM = pPool->CTXSUFF(pVM);
3106 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3107 pPool->pfnAccessHandlerR3, MMHyper2HC(pVM, (uintptr_t)pPage),
3108 pPool->pfnAccessHandlerR0, MMHyper2HC(pVM, (uintptr_t)pPage),
3109 pPool->pfnAccessHandlerGC, MMHyper2GC(pVM, (uintptr_t)pPage),
3110 pPool->pszAccessHandler);
3111 AssertFatalRCSuccess(rc);
3112# ifdef PGMPOOL_WITH_CACHE
3113 pgmPoolHashInsert(pPool, pPage);
3114# endif
3115 }
3116#endif
3117#ifdef PGMPOOL_WITH_USER_TRACKING
3118 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3119#endif
3120#ifdef PGMPOOL_WITH_CACHE
3121 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3122 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3123#endif
3124 }
3125
3126 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3127}
3128
3129
3130/**
3131 * Flushes a pool page.
3132 *
3133 * This moves the page to the free list after removing all user references to it.
3134 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3135 *
3136 * @returns VBox status code.
3137 * @retval VINF_SUCCESS on success.
3138 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3139 * @param pPool The pool.
3140 * @param HCPhys The HC physical address of the shadow page.
3141 */
3142int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3143{
3144 int rc = VINF_SUCCESS;
3145 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3146 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3147 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3148
3149 /*
3150 * Quietly reject any attempts at flushing any of the special root pages.
3151 */
3152 if (pPage->idx < PGMPOOL_IDX_FIRST)
3153 {
3154 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3155 return VINF_SUCCESS;
3156 }
3157
3158 /*
3159 * Mark the page as being in need of a ASMMemZeroPage().
3160 */
3161 pPage->fZeroed = false;
3162
3163#ifdef PGMPOOL_WITH_USER_TRACKING
3164 /*
3165 * Clear the page.
3166 */
3167 pgmPoolTrackClearPageUsers(pPool, pPage);
3168 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3169 pgmPoolTrackDeref(pPool, pPage);
3170 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3171#endif
3172
3173#ifdef PGMPOOL_WITH_CACHE
3174 /*
3175 * Flush it from the cache.
3176 */
3177 pgmPoolCacheFlushPage(pPool, pPage);
3178#endif /* PGMPOOL_WITH_CACHE */
3179
3180#ifdef PGMPOOL_WITH_MONITORING
3181 /*
3182 * Deregistering the monitoring.
3183 */
3184 if (pPage->fMonitored)
3185 rc = pgmPoolMonitorFlush(pPool, pPage);
3186#endif
3187
3188 /*
3189 * Free the page.
3190 */
3191 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3192 pPage->iNext = pPool->iFreeHead;
3193 pPool->iFreeHead = pPage->idx;
3194 pPage->enmKind = PGMPOOLKIND_FREE;
3195 pPage->GCPhys = NIL_RTGCPHYS;
3196 pPage->fReusedFlushPending = false;
3197
3198 pPool->cUsedPages--;
3199 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3200 return rc;
3201}
3202
3203
3204/**
3205 * Frees a usage of a pool page.
3206 *
3207 * The caller is responsible to updating the user table so that it no longer
3208 * references the shadow page.
3209 *
3210 * @param pPool The pool.
3211 * @param HCPhys The HC physical address of the shadow page.
3212 * @param iUser The shadow page pool index of the user table.
3213 * @param iUserTable The index into the user table (shadowed).
3214 */
3215void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3216{
3217 STAM_PROFILE_START(&pPool->StatFree, a);
3218 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3219 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3220 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3221#ifdef PGMPOOL_WITH_USER_TRACKING
3222 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3223#endif
3224#ifdef PGMPOOL_WITH_CACHE
3225 if (!pPage->fCached)
3226#endif
3227 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3228 STAM_PROFILE_STOP(&pPool->StatFree, a);
3229}
3230
3231
3232/**
3233 * Makes one or more free page free.
3234 *
3235 * @returns VBox status code.
3236 * @retval VINF_SUCCESS on success.
3237 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3238 *
3239 * @param pPool The pool.
3240 * @param iUser The user of the page.
3241 */
3242static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3243{
3244 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3245
3246 /*
3247 * If the pool isn't full grown yet, expand it.
3248 */
3249 if (pPool->cCurPages < pPool->cMaxPages)
3250 {
3251 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3252#ifdef IN_RING3
3253 int rc = PGMR3PoolGrow(pPool->pVMHC);
3254#else
3255 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3256#endif
3257 if (VBOX_FAILURE(rc))
3258 return rc;
3259 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3260 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3261 return VINF_SUCCESS;
3262 }
3263
3264#ifdef PGMPOOL_WITH_CACHE
3265 /*
3266 * Free one cached page.
3267 */
3268 return pgmPoolCacheFreeOne(pPool, iUser);
3269#else
3270 /*
3271 * Flush the pool.
3272 * If we have tracking enabled, it should be possible to come up with
3273 * a cheap replacement strategy...
3274 */
3275 pgmPoolFlushAllInt(pPool);
3276 return VERR_PGM_POOL_FLUSHED;
3277#endif
3278}
3279
3280
3281/**
3282 * Allocates a page from the pool.
3283 *
3284 * This page may actually be a cached page and not in need of any processing
3285 * on the callers part.
3286 *
3287 * @returns VBox status code.
3288 * @retval VINF_SUCCESS if a NEW page was allocated.
3289 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3290 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3291 * @param pVM The VM handle.
3292 * @param GCPhys The GC physical address of the page we're gonna shadow.
3293 * For 4MB and 2MB PD entries, it's the first address the
3294 * shadow PT is covering.
3295 * @param enmKind The kind of mapping.
3296 * @param iUser The shadow page pool index of the user table.
3297 * @param iUserTable The index into the user table (shadowed).
3298 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3299 */
3300int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3301{
3302 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3303 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3304 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3305
3306 *ppPage = NULL;
3307
3308#ifdef PGMPOOL_WITH_CACHE
3309 if (pPool->fCacheEnabled)
3310 {
3311 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3312 if (VBOX_SUCCESS(rc2))
3313 {
3314 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3315 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3316 return rc2;
3317 }
3318 }
3319#endif
3320
3321 /*
3322 * Allocate a new one.
3323 */
3324 int rc = VINF_SUCCESS;
3325 uint16_t iNew = pPool->iFreeHead;
3326 if (iNew == NIL_PGMPOOL_IDX)
3327 {
3328 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3329 if (VBOX_FAILURE(rc))
3330 {
3331 if (rc != VERR_PGM_POOL_CLEARED)
3332 {
3333 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3334 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3335 return rc;
3336 }
3337 rc = VERR_PGM_POOL_FLUSHED;
3338 }
3339 iNew = pPool->iFreeHead;
3340 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3341 }
3342
3343 /* unlink the free head */
3344 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3345 pPool->iFreeHead = pPage->iNext;
3346 pPage->iNext = NIL_PGMPOOL_IDX;
3347
3348 /*
3349 * Initialize it.
3350 */
3351 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3352 pPage->enmKind = enmKind;
3353 pPage->GCPhys = GCPhys;
3354 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3355 pPage->fMonitored = false;
3356 pPage->fCached = false;
3357 pPage->fReusedFlushPending = false;
3358 pPage->fCR3Mix = false;
3359#ifdef PGMPOOL_WITH_MONITORING
3360 pPage->cModifications = 0;
3361 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3362 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3363#endif
3364#ifdef PGMPOOL_WITH_USER_TRACKING
3365 pPage->cPresent = 0;
3366 pPage->iFirstPresent = ~0;
3367
3368 /*
3369 * Insert into the tracking and cache. If this fails, free the page.
3370 */
3371 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3372 if (VBOX_FAILURE(rc3))
3373 {
3374 if (rc3 != VERR_PGM_POOL_CLEARED)
3375 {
3376 pPool->cUsedPages--;
3377 pPage->enmKind = PGMPOOLKIND_FREE;
3378 pPage->GCPhys = NIL_RTGCPHYS;
3379 pPage->iNext = pPool->iFreeHead;
3380 pPool->iFreeHead = pPage->idx;
3381 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3382 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3383 return rc3;
3384 }
3385 rc = VERR_PGM_POOL_FLUSHED;
3386 }
3387#endif /* PGMPOOL_WITH_USER_TRACKING */
3388
3389 /*
3390 * Commit the allocation, clear the page and return.
3391 */
3392#ifdef VBOX_WITH_STATISTICS
3393 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3394 pPool->cUsedPagesHigh = pPool->cUsedPages;
3395#endif
3396
3397 if (!pPage->fZeroed)
3398 {
3399 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3400 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3401 ASMMemZeroPage(pv);
3402 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3403 }
3404
3405 *ppPage = pPage;
3406 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3407 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3408 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3409 return rc;
3410}
3411
3412
3413/**
3414 * Frees a usage of a pool page.
3415 *
3416 * @param pVM The VM handle.
3417 * @param HCPhys The HC physical address of the shadow page.
3418 * @param iUser The shadow page pool index of the user table.
3419 * @param iUserTable The index into the user table (shadowed).
3420 */
3421void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3422{
3423 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3424 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3425 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3426}
3427
3428
3429/**
3430 * Gets a in-use page in the pool by it's physical address.
3431 *
3432 * @returns Pointer to the page.
3433 * @param pVM The VM handle.
3434 * @param HCPhys The HC physical address of the shadow page.
3435 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3436 */
3437PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3438{
3439 /** @todo profile this! */
3440 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3441 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3442 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3443 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3444 return pPage;
3445}
3446
3447
3448/**
3449 * Flushes the entire cache.
3450 *
3451 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3452 * and execute this CR3 flush.
3453 *
3454 * @param pPool The pool.
3455 */
3456void pgmPoolFlushAll(PVM pVM)
3457{
3458 LogFlow(("pgmPoolFlushAll:\n"));
3459 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3460}
3461
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette