VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 5392

Last change on this file since 5392 was 5392, checked in by vboxsync, 17 years ago

Flush a page when we see MOVNTI or MOVNTDQ.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 120.2 KB
Line 
1/* $Id: PGMAllPool.cpp 5392 2007-10-19 16:58:43Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_GC
28# include <VBox/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vm.h>
32#include <VBox/disopcode.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37
38
39/*******************************************************************************
40* Internal Functions *
41*******************************************************************************/
42__BEGIN_DECLS
43static void pgmPoolFlushAllInt(PPGMPOOL pPool);
44#ifdef PGMPOOL_WITH_USER_TRACKING
45DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
46DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
47static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
48#endif
49#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
50static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
51#endif
52#ifdef PGMPOOL_WITH_CACHE
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
54#endif
55#ifdef PGMPOOL_WITH_MONITORING
56static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
57#endif
58#ifndef IN_RING3
59DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
60#endif
61__END_DECLS
62
63
64/**
65 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
66 *
67 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
68 * @param enmKind The page kind.
69 */
70DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
71{
72 switch (enmKind)
73 {
74 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
75 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
76 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
77 return true;
78 default:
79 return false;
80 }
81}
82
83
84#ifdef IN_GC
85/**
86 * Maps a pool page into the current context.
87 *
88 * @returns Pointer to the mapping.
89 * @param pVM The VM handle.
90 * @param pPage The page to map.
91 */
92void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
93{
94 /* general pages. */
95 if (pPage->idx >= PGMPOOL_IDX_FIRST)
96 {
97 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
98 void *pv;
99 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
100 AssertReleaseRC(rc);
101 return pv;
102 }
103
104 /* special pages. */
105 switch (pPage->idx)
106 {
107 case PGMPOOL_IDX_PD:
108 return pVM->pgm.s.pGC32BitPD;
109 case PGMPOOL_IDX_PAE_PD:
110 return pVM->pgm.s.apGCPaePDs[0];
111 case PGMPOOL_IDX_PDPTR:
112 return pVM->pgm.s.pGCPaePDPTR;
113 case PGMPOOL_IDX_PML4:
114 return pVM->pgm.s.pGCPaePML4;
115 default:
116 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
117 return NULL;
118 }
119}
120#endif /* IN_GC */
121
122
123#ifdef PGMPOOL_WITH_MONITORING
124/**
125 * Determin the size of a write instruction.
126 * @returns number of bytes written.
127 * @param pDis The disassembler state.
128 */
129static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
130{
131 /*
132 * This is very crude and possibly wrong for some opcodes,
133 * but since it's not really supposed to be called we can
134 * probably live with that.
135 */
136 return DISGetParamSize(pDis, &pDis->param1);
137}
138
139
140/**
141 * Flushes a chain of pages sharing the same access monitor.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pPool The pool.
145 * @param pPage A page in the chain.
146 */
147int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
148{
149 /*
150 * Find the list head.
151 */
152 uint16_t idx = pPage->idx;
153 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
154 {
155 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
156 {
157 idx = pPage->iMonitoredPrev;
158 Assert(idx != pPage->idx);
159 pPage = &pPool->aPages[idx];
160 }
161 }
162
163 /*
164 * Itereate the list flushing each shadow page.
165 */
166 int rc = VINF_SUCCESS;
167 for (;;)
168 {
169 idx = pPage->iMonitoredNext;
170 Assert(idx != pPage->idx);
171 if (pPage->idx >= PGMPOOL_IDX_FIRST)
172 {
173 int rc2 = pgmPoolFlushPage(pPool, pPage);
174 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
175 rc = VINF_PGM_SYNC_CR3;
176 }
177 /* next */
178 if (idx == NIL_PGMPOOL_IDX)
179 break;
180 pPage = &pPool->aPages[idx];
181 }
182 return rc;
183}
184
185
186/**
187 * Wrapper for getting the current context pointer to the entry being modified.
188 *
189 * @returns Pointer to the current context mapping of the entry.
190 * @param pPool The pool.
191 * @param pvFault The fault virtual address.
192 * @param GCPhysFault The fault physical address.
193 * @param cbEntry The entry size.
194 */
195#ifdef IN_RING3
196DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
197#else
198DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
199#endif
200{
201#ifdef IN_GC
202 return (RTGCPTR)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
203
204#elif defined(IN_RING0)
205 void *pvRet;
206 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
207 AssertFatalRCSuccess(rc);
208 return pvRet;
209
210#elif defined(IN_RING3)
211 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
212#else
213# error "huh?"
214#endif
215}
216
217
218/**
219 * Process shadow entries before they are changed by the guest.
220 *
221 * For PT entries we will clear them. For PD entries, we'll simply check
222 * for mapping conflicts and set the SyncCR3 FF if found.
223 *
224 * @param pPool The pool.
225 * @param pPage The head page.
226 * @param GCPhysFault The guest physical fault address.
227 * @param uAddress In R0 and GC this is the guest context fault address (flat).
228 * In R3 this is the host context 'fault' address.
229 * @param pCpu The disassembler state for figuring out the write size.
230 * This need not be specified if the caller knows we won't do cross entry accesses.
231 */
232#ifdef IN_RING3
233void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
234#else
235void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
236#endif
237{
238 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
239 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
240 for (;;)
241 {
242 union
243 {
244 void *pv;
245 PX86PT pPT;
246 PX86PTPAE pPTPae;
247 PX86PD pPD;
248 PX86PDPAE pPDPae;
249 } uShw;
250 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
251
252 switch (pPage->enmKind)
253 {
254 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
255 {
256 const unsigned iShw = off / sizeof(X86PTE);
257 if (uShw.pPT->a[iShw].n.u1Present)
258 {
259# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
260 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
261 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
262 pgmPoolTracDerefGCPhysHint(pPool, pPage,
263 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
264 pGstPte->u & X86_PTE_PG_MASK);
265# endif
266 uShw.pPT->a[iShw].u = 0;
267 }
268 break;
269 }
270
271 /* page/2 sized */
272 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
273 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
274 {
275 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
276 if (uShw.pPTPae->a[iShw].n.u1Present)
277 {
278# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
279 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
280 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
281 pgmPoolTracDerefGCPhysHint(pPool, pPage,
282 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
283 pGstPte->u & X86_PTE_PG_MASK);
284# endif
285 uShw.pPTPae->a[iShw].u = 0;
286 }
287 }
288 break;
289
290 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
291 {
292 const unsigned iShw = off / sizeof(X86PTPAE);
293 if (uShw.pPTPae->a[iShw].n.u1Present)
294 {
295# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
296 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
297 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
298 pgmPoolTracDerefGCPhysHint(pPool, pPage,
299 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
300 pGstPte->u & X86_PTE_PAE_PG_MASK);
301# endif
302 uShw.pPTPae->a[iShw].u = 0;
303 }
304 break;
305 }
306
307 case PGMPOOLKIND_ROOT_32BIT_PD:
308 {
309 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
310 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
311 {
312 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
313 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
314 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
315 }
316 /* paranoia / a bit assumptive. */
317 else if ( pCpu
318 && (off & 4)
319 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
320 {
321 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
322 if ( iShw2 != iShw
323 && iShw2 < ELEMENTS(uShw.pPD->a)
324 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
325 {
326 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
327 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
328 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
329 }
330 }
331#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
332 if ( uShw.pPD->a[iShw].n.u1Present
333 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
334 {
335 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
336# ifdef IN_GC /* TLB load - we're pushing things a bit... */
337 ASMProbeReadByte(pvAddress);
338# endif
339 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
340 uShw.pPD->a[iShw].u = 0;
341 }
342#endif
343 break;
344 }
345
346 case PGMPOOLKIND_ROOT_PAE_PD:
347 {
348 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
349 for (unsigned i = 0; i < 2; i++, iShw++)
350 {
351 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
352 {
353 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
354 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
355 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
356 }
357 /* paranoia / a bit assumptive. */
358 else if ( pCpu
359 && (off & 4)
360 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
361 {
362 const unsigned iShw2 = iShw + 2;
363 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
364 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
365 {
366 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
367 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
368 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
369 }
370 }
371#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
372 if ( uShw.pPDPae->a[iShw].n.u1Present
373 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
374 {
375 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
376# ifdef IN_GC /* TLB load - we're pushing things a bit... */
377 ASMProbeReadByte(pvAddress);
378# endif
379 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
380 uShw.pPDPae->a[iShw].u = 0;
381 }
382#endif
383 }
384 break;
385 }
386
387 default:
388 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
389 }
390
391 /* next */
392 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
393 return;
394 pPage = &pPool->aPages[pPage->iMonitoredNext];
395 }
396}
397
398
399# ifndef IN_RING3
400/**
401 * Checks if a access could be a fork operation in progress.
402 *
403 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
404 *
405 * @returns true if it's likly that we're forking, otherwise false.
406 * @param pPool The pool.
407 * @param pCpu The disassembled instruction.
408 * @param offFault The access offset.
409 */
410DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
411{
412 /*
413 * i386 linux is using btr to clear X86_PTE_RW.
414 * The functions involved are (2.6.16 source inspection):
415 * clear_bit
416 * ptep_set_wrprotect
417 * copy_one_pte
418 * copy_pte_range
419 * copy_pmd_range
420 * copy_pud_range
421 * copy_page_range
422 * dup_mmap
423 * dup_mm
424 * copy_mm
425 * copy_process
426 * do_fork
427 */
428 if ( pCpu->pCurInstr->opcode == OP_BTR
429 && !(offFault & 4)
430 /** @todo Validate that the bit index is X86_PTE_RW. */
431 )
432 {
433 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
434 return true;
435 }
436 return false;
437}
438
439
440/**
441 * Determin whether the page is likely to have been reused.
442 *
443 * @returns true if we consider the page as being reused for a different purpose.
444 * @returns false if we consider it to still be a paging page.
445 * @param pPage The page in question.
446 * @param pCpu The disassembly info for the faulting insturction.
447 * @param pvFault The fault address.
448 *
449 * @remark The REP prefix check is left to the caller because of STOSD/W.
450 */
451DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
452{
453 switch (pCpu->pCurInstr->opcode)
454 {
455 case OP_PUSH:
456 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
457 return true;
458 case OP_PUSHF:
459 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
460 return true;
461 case OP_PUSHA:
462 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
463 return true;
464 case OP_FXSAVE:
465 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
466 return true;
467 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
468 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
469 return true;
470 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
471 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
472 return true;
473 }
474 if ( (pCpu->param1.flags & USE_REG_GEN32)
475 && (pCpu->param1.base.reg_gen32 == USE_REG_ESP))
476 {
477 Log4(("pgmPoolMonitorIsReused: ESP\n"));
478 return true;
479 }
480
481 //if (pPage->fCR3Mix)
482 // return false;
483 return false;
484}
485
486
487/**
488 * Flushes the page being accessed.
489 *
490 * @returns VBox status code suitable for scheduling.
491 * @param pVM The VM handle.
492 * @param pPool The pool.
493 * @param pPage The pool page (head).
494 * @param pCpu The disassembly of the write instruction.
495 * @param pRegFrame The trap register frame.
496 * @param GCPhysFault The fault address as guest physical address.
497 * @param pvFault The fault address.
498 */
499static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
500 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
501{
502 /*
503 * First, do the flushing.
504 */
505 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
506
507 /*
508 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
509 */
510 uint32_t cbWritten;
511 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
512 if (VBOX_SUCCESS(rc2))
513 pRegFrame->eip += pCpu->opsize;
514 else if (rc2 == VERR_EM_INTERPRETER)
515 {
516#ifdef IN_GC
517 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip))
518 {
519 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
520 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
521 rc = VINF_SUCCESS;
522 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
523 }
524 else
525#endif
526 {
527 rc = VINF_EM_RAW_EMULATE_INSTR;
528 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
529 }
530 }
531 else
532 rc = rc2;
533
534 /* See use in pgmPoolAccessHandlerSimple(). */
535 PGM_INVL_GUEST_TLBS();
536
537 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
538 return rc;
539
540}
541
542
543/**
544 * Handles the STOSD write accesses.
545 *
546 * @returns VBox status code suitable for scheduling.
547 * @param pVM The VM handle.
548 * @param pPool The pool.
549 * @param pPage The pool page (head).
550 * @param pCpu The disassembly of the write instruction.
551 * @param pRegFrame The trap register frame.
552 * @param GCPhysFault The fault address as guest physical address.
553 * @param pvFault The fault address.
554 */
555DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
556 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
557{
558 /*
559 * Increment the modification counter and insert it into the list
560 * of modified pages the first time.
561 */
562 if (!pPage->cModifications++)
563 pgmPoolMonitorModifiedInsert(pPool, pPage);
564
565 /*
566 * Execute REP STOSD.
567 *
568 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
569 * write situation, meaning that it's safe to write here.
570 */
571#ifdef IN_GC
572 uint32_t *pu32 = (uint32_t *)pvFault;
573#else
574 RTGCPTR pu32 = pvFault;
575#endif
576 while (pRegFrame->ecx)
577 {
578 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pu32, NULL);
579#ifdef IN_GC
580 *pu32++ = pRegFrame->eax;
581#else
582 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
583 pu32 += 4;
584#endif
585 GCPhysFault += 4;
586 pRegFrame->edi += 4;
587 pRegFrame->ecx--;
588 }
589 pRegFrame->eip += pCpu->opsize;
590
591 /* See use in pgmPoolAccessHandlerSimple(). */
592 PGM_INVL_GUEST_TLBS();
593
594 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
595 return VINF_SUCCESS;
596}
597
598
599/**
600 * Handles the simple write accesses.
601 *
602 * @returns VBox status code suitable for scheduling.
603 * @param pVM The VM handle.
604 * @param pPool The pool.
605 * @param pPage The pool page (head).
606 * @param pCpu The disassembly of the write instruction.
607 * @param pRegFrame The trap register frame.
608 * @param GCPhysFault The fault address as guest physical address.
609 * @param pvFault The fault address.
610 */
611DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
612 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
613{
614 /*
615 * Increment the modification counter and insert it into the list
616 * of modified pages the first time.
617 */
618 if (!pPage->cModifications++)
619 pgmPoolMonitorModifiedInsert(pPool, pPage);
620
621 /*
622 * Clear all the pages. ASSUMES that pvFault is readable.
623 */
624 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
625
626 /*
627 * Interpret the instruction.
628 */
629 uint32_t cb;
630 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
631 if (VBOX_SUCCESS(rc))
632 pRegFrame->eip += pCpu->opsize;
633 else if (rc == VERR_EM_INTERPRETER)
634 {
635# ifdef IN_GC
636 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)(RTGCUINTPTR)pCpu->opaddr))
637 {
638 /* We're not able to handle this in ring-3, so fix the interpreter! */
639 /** @note Should be fine. There's no need to flush the whole thing. */
640#ifndef DEBUG_sandervl
641 AssertMsgFailed(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
642 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
643#endif
644 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch1);
645 rc = pgmPoolMonitorChainFlush(pPool, pPage);
646 }
647 else
648# endif
649 {
650 rc = VINF_EM_RAW_EMULATE_INSTR;
651 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
652 }
653 }
654
655 /*
656 * Quick hack, with logging enabled we're getting stale
657 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
658 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
659 * have to be fixed to support this. But that'll have to wait till next week.
660 *
661 * An alternative is to keep track of the changed PTEs together with the
662 * GCPhys from the guest PT. This may proove expensive though.
663 *
664 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
665 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
666 */
667 PGM_INVL_GUEST_TLBS();
668
669 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
670 return rc;
671}
672
673
674/**
675 * \#PF Handler callback for PT write accesses.
676 *
677 * @returns VBox status code (appropriate for GC return).
678 * @param pVM VM Handle.
679 * @param uErrorCode CPU Error code.
680 * @param pRegFrame Trap register frame.
681 * NULL on DMA and other non CPU access.
682 * @param pvFault The fault address (cr2).
683 * @param GCPhysFault The GC physical address corresponding to pvFault.
684 * @param pvUser User argument.
685 */
686DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
687{
688 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
689 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
690 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
691 LogFlow(("pgmPoolAccessHandler: pvFault=%p pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
692
693 /*
694 * We should ALWAYS have the list head as user parameter. This
695 * is because we use that page to record the changes.
696 */
697 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
698
699 /*
700 * Disassemble the faulting instruction.
701 */
702 DISCPUSTATE Cpu;
703 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
704 AssertRCReturn(rc, rc);
705
706 /*
707 * Check if it's worth dealing with.
708 */
709 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
710 || pPage->fCR3Mix)
711 && !pgmPoolMonitorIsReused(pPage, &Cpu, pvFault)
712 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
713 {
714 /*
715 * Simple instructions, no REP prefix.
716 */
717 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
718 {
719 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
720 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
721 return rc;
722 }
723
724 /*
725 * Windows is frequently doing small memset() operations (netio test 4k+).
726 * We have to deal with these or we'll kill the cache and performance.
727 */
728 if ( Cpu.pCurInstr->opcode == OP_STOSWD
729 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
730 && pRegFrame->ecx <= 0x20
731 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
732 && !((uintptr_t)pvFault & 3)
733 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
734 && Cpu.mode == CPUMODE_32BIT
735 && Cpu.opmode == CPUMODE_32BIT
736 && Cpu.addrmode == CPUMODE_32BIT
737 && Cpu.prefix == PREFIX_REP
738 && !pRegFrame->eflags.Bits.u1DF
739 )
740 {
741 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
742 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
743 return rc;
744 }
745
746 /* REP prefix, don't bother. */
747 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
748 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
749 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
750 }
751
752 /*
753 * Not worth it, so flush it.
754 */
755 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
756 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
757 return rc;
758}
759
760# endif /* !IN_RING3 */
761#endif /* PGMPOOL_WITH_MONITORING */
762
763
764
765#ifdef PGMPOOL_WITH_CACHE
766/**
767 * Inserts a page into the GCPhys hash table.
768 *
769 * @param pPool The pool.
770 * @param pPage The page.
771 */
772DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
773{
774 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
775 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
776 pPage->iNext = pPool->aiHash[iHash];
777 pPool->aiHash[iHash] = pPage->idx;
778}
779
780
781/**
782 * Removes a page from the GCPhys hash table.
783 *
784 * @param pPool The pool.
785 * @param pPage The page.
786 */
787DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
788{
789 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
790 if (pPool->aiHash[iHash] == pPage->idx)
791 pPool->aiHash[iHash] = pPage->iNext;
792 else
793 {
794 uint16_t iPrev = pPool->aiHash[iHash];
795 for (;;)
796 {
797 const int16_t i = pPool->aPages[iPrev].iNext;
798 if (i == pPage->idx)
799 {
800 pPool->aPages[iPrev].iNext = pPage->iNext;
801 break;
802 }
803 if (i == NIL_PGMPOOL_IDX)
804 {
805 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
806 break;
807 }
808 iPrev = i;
809 }
810 }
811 pPage->iNext = NIL_PGMPOOL_IDX;
812}
813
814
815/**
816 * Frees up one cache page.
817 *
818 * @returns VBox status code.
819 * @retval VINF_SUCCESS on success.
820 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
821 * @param pPool The pool.
822 * @param iUser The user index.
823 */
824static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
825{
826 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
827 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
828
829 /*
830 * Select one page from the tail of the age list.
831 */
832 uint16_t iToFree = pPool->iAgeTail;
833 if (iToFree == iUser)
834 iToFree = pPool->aPages[iToFree].iAgePrev;
835/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
836 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
837 {
838 uint16_t i = pPool->aPages[iToFree].iAgePrev;
839 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
840 {
841 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
842 continue;
843 iToFree = i;
844 break;
845 }
846 }
847*/
848 Assert(iToFree != iUser);
849 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
850
851 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
852 if (rc == VINF_SUCCESS)
853 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
854 return rc;
855}
856
857
858/**
859 * Checks if a kind mismatch is really a page being reused
860 * or if it's just normal remappings.
861 *
862 * @returns true if reused and the cached page (enmKind1) should be flushed
863 * @returns false if not reused.
864 * @param enmKind1 The kind of the cached page.
865 * @param enmKind2 The kind of the requested page.
866 */
867static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
868{
869 switch (enmKind1)
870 {
871 /*
872 * Never reuse them. There is no remapping in non-paging mode.
873 */
874 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
875 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
876 return true;
877
878 /*
879 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
880 */
881 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
882 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
883 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
884 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
885 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
886 switch (enmKind2)
887 {
888 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
889 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
890 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
891 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
892 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
893 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
894 return true;
895 default:
896 return false;
897 }
898
899 /*
900 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
901 */
902 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
903 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
904 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
905 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
906 switch (enmKind2)
907 {
908 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
909 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
910 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
911 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
912 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
913 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
914 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
915 return true;
916 default:
917 return false;
918 }
919
920 /*
921 * These cannot be flushed, and it's common to reuse the PDs as PTs.
922 */
923 case PGMPOOLKIND_ROOT_32BIT_PD:
924 case PGMPOOLKIND_ROOT_PAE_PD:
925 case PGMPOOLKIND_ROOT_PDPTR:
926 case PGMPOOLKIND_ROOT_PML4:
927 return false;
928
929 default:
930 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
931 }
932}
933
934
935/**
936 * Attempts to satisfy a pgmPoolAlloc request from the cache.
937 *
938 * @returns VBox status code.
939 * @retval VINF_PGM_CACHED_PAGE on success.
940 * @retval VERR_FILE_NOT_FOUND if not found.
941 * @param pPool The pool.
942 * @param GCPhys The GC physical address of the page we're gonna shadow.
943 * @param enmKind The kind of mapping.
944 * @param iUser The shadow page pool index of the user table.
945 * @param iUserTable The index into the user table (shadowed).
946 * @param ppPage Where to store the pointer to the page.
947 */
948static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
949{
950 /*
951 * Look up the GCPhys in the hash.
952 */
953 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
954 if (i != NIL_PGMPOOL_IDX)
955 {
956 do
957 {
958 PPGMPOOLPAGE pPage = &pPool->aPages[i];
959 if (pPage->GCPhys == GCPhys)
960 {
961 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
962 {
963 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
964 if (VBOX_SUCCESS(rc))
965 {
966 *ppPage = pPage;
967 STAM_COUNTER_INC(&pPool->StatCacheHits);
968 return VINF_PGM_CACHED_PAGE;
969 }
970 return rc;
971 }
972
973 /*
974 * The kind is different. In some cases we should now flush the page
975 * as it has been reused, but in most cases this is normal remapping
976 * of PDs as PT or big pages using the GCPhys field in a slightly
977 * different way than the other kinds.
978 */
979 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
980 {
981 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
982 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
983 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
984 break;
985 }
986 }
987
988 /* next */
989 i = pPage->iNext;
990 } while (i != NIL_PGMPOOL_IDX);
991 }
992
993 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
994 STAM_COUNTER_INC(&pPool->StatCacheMisses);
995 return VERR_FILE_NOT_FOUND;
996}
997
998
999/**
1000 * Inserts a page into the cache.
1001 *
1002 * @param pPool The pool.
1003 * @param pPage The cached page.
1004 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1005 */
1006static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1007{
1008 /*
1009 * Insert into the GCPhys hash if the page is fit for that.
1010 */
1011 Assert(!pPage->fCached);
1012 if (fCanBeCached)
1013 {
1014 pPage->fCached = true;
1015 pgmPoolHashInsert(pPool, pPage);
1016 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1017 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1018 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1019 }
1020 else
1021 {
1022 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1023 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1024 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1025 }
1026
1027 /*
1028 * Insert at the head of the age list.
1029 */
1030 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1031 pPage->iAgeNext = pPool->iAgeHead;
1032 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1033 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1034 else
1035 pPool->iAgeTail = pPage->idx;
1036 pPool->iAgeHead = pPage->idx;
1037}
1038
1039
1040/**
1041 * Flushes a cached page.
1042 *
1043 * @param pPool The pool.
1044 * @param pPage The cached page.
1045 */
1046static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1047{
1048 /*
1049 * Remove the page from the hash.
1050 */
1051 if (pPage->fCached)
1052 {
1053 pPage->fCached = false;
1054 pgmPoolHashRemove(pPool, pPage);
1055 }
1056 else
1057 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1058
1059 /*
1060 * Remove it from the age list.
1061 */
1062 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1063 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1064 else
1065 pPool->iAgeTail = pPage->iAgePrev;
1066 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1067 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1068 else
1069 pPool->iAgeHead = pPage->iAgeNext;
1070 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1071 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1072}
1073#endif /* PGMPOOL_WITH_CACHE */
1074
1075
1076#ifdef PGMPOOL_WITH_MONITORING
1077/**
1078 * Looks for pages sharing the monitor.
1079 *
1080 * @returns Pointer to the head page.
1081 * @returns NULL if not found.
1082 * @param pPool The Pool
1083 * @param pNewPage The page which is going to be monitored.
1084 */
1085static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1086{
1087#ifdef PGMPOOL_WITH_CACHE
1088 /*
1089 * Look up the GCPhys in the hash.
1090 */
1091 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1092 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1093 if (i == NIL_PGMPOOL_IDX)
1094 return NULL;
1095 do
1096 {
1097 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1098 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1099 && pPage != pNewPage)
1100 {
1101 switch (pPage->enmKind)
1102 {
1103 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1104 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1105 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1106 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1107 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1108 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1109 case PGMPOOLKIND_ROOT_32BIT_PD:
1110 case PGMPOOLKIND_ROOT_PAE_PD:
1111 case PGMPOOLKIND_ROOT_PDPTR:
1112 case PGMPOOLKIND_ROOT_PML4:
1113 {
1114 /* find the head */
1115 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1116 {
1117 Assert(pPage->iMonitoredPrev != pPage->idx);
1118 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1119 }
1120 return pPage;
1121 }
1122
1123 /* ignore, no monitoring. */
1124 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1125 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1126 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1127 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1128 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1129 break;
1130 default:
1131 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1132 }
1133 }
1134
1135 /* next */
1136 i = pPage->iNext;
1137 } while (i != NIL_PGMPOOL_IDX);
1138#endif
1139 return NULL;
1140}
1141
1142/**
1143 * Enabled write monitoring of a guest page.
1144 *
1145 * @returns VBox status code.
1146 * @retval VINF_SUCCESS on success.
1147 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1148 * @param pPool The pool.
1149 * @param pPage The cached page.
1150 */
1151static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1152{
1153 /*
1154 * Filter out the relevant kinds.
1155 */
1156 switch (pPage->enmKind)
1157 {
1158 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1159 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1160 break;
1161
1162 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1163 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1164 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1165 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1166 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1167 /* Nothing to monitor here. */
1168 return VINF_SUCCESS;
1169
1170 case PGMPOOLKIND_ROOT_32BIT_PD:
1171 case PGMPOOLKIND_ROOT_PAE_PD:
1172#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1173 break;
1174#endif
1175 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1176 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1177 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1178 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1179 case PGMPOOLKIND_ROOT_PDPTR:
1180 case PGMPOOLKIND_ROOT_PML4:
1181 default:
1182 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1183 }
1184
1185 /*
1186 * Install handler.
1187 */
1188 int rc;
1189 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1190 if (pPageHead)
1191 {
1192 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1193 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1194 pPage->iMonitoredPrev = pPageHead->idx;
1195 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1196 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1197 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1198 pPageHead->iMonitoredNext = pPage->idx;
1199 rc = VINF_SUCCESS;
1200 }
1201 else
1202 {
1203 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1204 PVM pVM = pPool->CTXSUFF(pVM);
1205 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1206 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1207 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1208 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1209 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1210 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
1211 pPool->pszAccessHandler);
1212 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1213 * the heap size should suffice. */
1214 AssertFatalRC(rc);
1215 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1216 rc = VERR_PGM_POOL_CLEARED;
1217 }
1218 pPage->fMonitored = true;
1219 return rc;
1220}
1221
1222
1223/**
1224 * Disables write monitoring of a guest page.
1225 *
1226 * @returns VBox status code.
1227 * @retval VINF_SUCCESS on success.
1228 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1229 * @param pPool The pool.
1230 * @param pPage The cached page.
1231 */
1232static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1233{
1234 /*
1235 * Filter out the relevant kinds.
1236 */
1237 switch (pPage->enmKind)
1238 {
1239 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1240 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1241 break;
1242
1243 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1244 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1245 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1246 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1247 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1248 /* Nothing to monitor here. */
1249 return VINF_SUCCESS;
1250
1251 case PGMPOOLKIND_ROOT_32BIT_PD:
1252 case PGMPOOLKIND_ROOT_PAE_PD:
1253#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1254 break;
1255#endif
1256 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1257 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1258 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1259 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1260 case PGMPOOLKIND_ROOT_PDPTR:
1261 case PGMPOOLKIND_ROOT_PML4:
1262 default:
1263 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1264 }
1265
1266 /*
1267 * Remove the page from the monitored list or uninstall it if last.
1268 */
1269 const PVM pVM = pPool->CTXSUFF(pVM);
1270 int rc;
1271 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1272 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1273 {
1274 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1275 {
1276 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1277 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1278 pNewHead->fCR3Mix = pPage->fCR3Mix;
1279 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1280 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1281 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1282 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pNewHead),
1283 pPool->pszAccessHandler);
1284 AssertFatalRCSuccess(rc);
1285 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1286 }
1287 else
1288 {
1289 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1290 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1291 {
1292 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1293 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1294 }
1295 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1296 rc = VINF_SUCCESS;
1297 }
1298 }
1299 else
1300 {
1301 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1302 AssertFatalRC(rc);
1303 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1304 rc = VERR_PGM_POOL_CLEARED;
1305 }
1306 pPage->fMonitored = false;
1307
1308 /*
1309 * Remove it from the list of modified pages (if in it).
1310 */
1311 pgmPoolMonitorModifiedRemove(pPool, pPage);
1312
1313 return rc;
1314}
1315
1316
1317#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1318/**
1319 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1320 *
1321 * @param pPool The Pool.
1322 * @param pPage A page in the chain.
1323 * @param fCR3Mix The new fCR3Mix value.
1324 */
1325static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1326{
1327 /* current */
1328 pPage->fCR3Mix = fCR3Mix;
1329
1330 /* before */
1331 int16_t idx = pPage->iMonitoredPrev;
1332 while (idx != NIL_PGMPOOL_IDX)
1333 {
1334 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1335 idx = pPool->aPages[idx].iMonitoredPrev;
1336 }
1337
1338 /* after */
1339 idx = pPage->iMonitoredNext;
1340 while (idx != NIL_PGMPOOL_IDX)
1341 {
1342 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1343 idx = pPool->aPages[idx].iMonitoredNext;
1344 }
1345}
1346
1347
1348/**
1349 * Installs or modifies monitoring of a CR3 page (special).
1350 *
1351 * We're pretending the CR3 page is shadowed by the pool so we can use the
1352 * generic mechanisms in detecting chained monitoring. (This also gives us a
1353 * tast of what code changes are required to really pool CR3 shadow pages.)
1354 *
1355 * @returns VBox status code.
1356 * @param pPool The pool.
1357 * @param idxRoot The CR3 (root) page index.
1358 * @param GCPhysCR3 The (new) CR3 value.
1359 */
1360int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1361{
1362 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1363 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1364 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1365 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1366
1367 /*
1368 * The unlikely case where it already matches.
1369 */
1370 if (pPage->GCPhys == GCPhysCR3)
1371 {
1372 Assert(pPage->fMonitored);
1373 return VINF_SUCCESS;
1374 }
1375
1376 /*
1377 * Flush the current monitoring and remove it from the hash.
1378 */
1379 int rc = VINF_SUCCESS;
1380 if (pPage->fMonitored)
1381 {
1382 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1383 rc = pgmPoolMonitorFlush(pPool, pPage);
1384 if (rc == VERR_PGM_POOL_CLEARED)
1385 rc = VINF_SUCCESS;
1386 else
1387 AssertFatalRC(rc);
1388 pgmPoolHashRemove(pPool, pPage);
1389 }
1390
1391 /*
1392 * Monitor the page at the new location and insert it into the hash.
1393 */
1394 pPage->GCPhys = GCPhysCR3;
1395 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1396 if (rc2 != VERR_PGM_POOL_CLEARED)
1397 {
1398 AssertFatalRC(rc2);
1399 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1400 rc = rc2;
1401 }
1402 pgmPoolHashInsert(pPool, pPage);
1403 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1404 return rc;
1405}
1406
1407
1408/**
1409 * Removes the monitoring of a CR3 page (special).
1410 *
1411 * @returns VBox status code.
1412 * @param pPool The pool.
1413 * @param idxRoot The CR3 (root) page index.
1414 */
1415int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1416{
1417 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1418 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1419 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1420 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1421
1422 if (!pPage->fMonitored)
1423 return VINF_SUCCESS;
1424
1425 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1426 int rc = pgmPoolMonitorFlush(pPool, pPage);
1427 if (rc != VERR_PGM_POOL_CLEARED)
1428 AssertFatalRC(rc);
1429 else
1430 rc = VINF_SUCCESS;
1431 pgmPoolHashRemove(pPool, pPage);
1432 Assert(!pPage->fMonitored);
1433 pPage->GCPhys = NIL_RTGCPHYS;
1434 return rc;
1435}
1436#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1437
1438
1439/**
1440 * Inserts the page into the list of modified pages.
1441 *
1442 * @param pPool The pool.
1443 * @param pPage The page.
1444 */
1445void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1446{
1447 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1448 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1449 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1450 && pPool->iModifiedHead != pPage->idx,
1451 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1452 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1453 pPool->iModifiedHead, pPool->cModifiedPages));
1454
1455 pPage->iModifiedNext = pPool->iModifiedHead;
1456 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1457 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1458 pPool->iModifiedHead = pPage->idx;
1459 pPool->cModifiedPages++;
1460#ifdef VBOX_WITH_STATISTICS
1461 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1462 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1463#endif
1464}
1465
1466
1467/**
1468 * Removes the page from the list of modified pages and resets the
1469 * moficiation counter.
1470 *
1471 * @param pPool The pool.
1472 * @param pPage The page which is believed to be in the list of modified pages.
1473 */
1474static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1475{
1476 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1477 if (pPool->iModifiedHead == pPage->idx)
1478 {
1479 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1480 pPool->iModifiedHead = pPage->iModifiedNext;
1481 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1482 {
1483 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1484 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1485 }
1486 pPool->cModifiedPages--;
1487 }
1488 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1489 {
1490 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1491 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1492 {
1493 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1494 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1495 }
1496 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1497 pPool->cModifiedPages--;
1498 }
1499 else
1500 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1501 pPage->cModifications = 0;
1502}
1503
1504
1505/**
1506 * Zaps the list of modified pages, resetting their modification counters in the process.
1507 *
1508 * @param pVM The VM handle.
1509 */
1510void pgmPoolMonitorModifiedClearAll(PVM pVM)
1511{
1512 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1513 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1514
1515 unsigned cPages = 0; NOREF(cPages);
1516 uint16_t idx = pPool->iModifiedHead;
1517 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1518 while (idx != NIL_PGMPOOL_IDX)
1519 {
1520 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1521 idx = pPage->iModifiedNext;
1522 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1523 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1524 pPage->cModifications = 0;
1525 Assert(++cPages);
1526 }
1527 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1528 pPool->cModifiedPages = 0;
1529}
1530
1531
1532/**
1533 * Clear all shadow pages and clear all modification counters.
1534 *
1535 * @param pVM The VM handle.
1536 * @remark Should only be used when monitoring is available, thus placed in
1537 * the PGMPOOL_WITH_MONITORING #ifdef.
1538 */
1539void pgmPoolClearAll(PVM pVM)
1540{
1541 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1542 STAM_PROFILE_START(&pPool->StatClearAll, c);
1543 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1544
1545 /*
1546 * Iterate all the pages until we've encountered all that in use.
1547 * This is simple but not quite optimal solution.
1548 */
1549 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1550 unsigned cLeft = pPool->cUsedPages;
1551 unsigned iPage = pPool->cCurPages;
1552 while (--iPage >= PGMPOOL_IDX_FIRST)
1553 {
1554 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1555 if (pPage->GCPhys != NIL_RTGCPHYS)
1556 {
1557 switch (pPage->enmKind)
1558 {
1559 /*
1560 * We only care about shadow page tables.
1561 */
1562 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1563 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1564 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1565 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1566 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1567 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1568 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1569 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1570 {
1571#ifdef PGMPOOL_WITH_USER_TRACKING
1572 if (pPage->cPresent)
1573#endif
1574 {
1575 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1576 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1577 ASMMemZeroPage(pvShw);
1578 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1579#ifdef PGMPOOL_WITH_USER_TRACKING
1580 pPage->cPresent = 0;
1581 pPage->iFirstPresent = ~0;
1582#endif
1583 }
1584 }
1585 /* fall thru */
1586
1587 default:
1588 Assert(!pPage->cModifications || ++cModifiedPages);
1589 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1590 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1591 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1592 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1593 pPage->cModifications = 0;
1594 break;
1595
1596 }
1597 if (!--cLeft)
1598 break;
1599 }
1600 }
1601
1602 /* swipe the special pages too. */
1603 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1604 {
1605 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1606 if (pPage->GCPhys != NIL_RTGCPHYS)
1607 {
1608 Assert(!pPage->cModifications || ++cModifiedPages);
1609 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1610 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1611 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1612 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1613 pPage->cModifications = 0;
1614 }
1615 }
1616
1617 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1618 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1619 pPool->cModifiedPages = 0;
1620
1621#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1622 /*
1623 * Clear all the GCPhys links and rebuild the phys ext free list.
1624 */
1625 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
1626 pRam;
1627 pRam = pRam->CTXSUFF(pNext))
1628 {
1629 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1630 while (iPage-- > 0)
1631 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1632 }
1633
1634 pPool->iPhysExtFreeHead = 0;
1635 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1636 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1637 for (unsigned i = 0; i < cMaxPhysExts; i++)
1638 {
1639 paPhysExts[i].iNext = i + 1;
1640 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1641 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1642 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1643 }
1644 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1645#endif
1646
1647
1648 pPool->cPresent = 0;
1649 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1650}
1651#endif /* PGMPOOL_WITH_MONITORING */
1652
1653
1654#ifdef PGMPOOL_WITH_USER_TRACKING
1655/**
1656 * Frees up at least one user entry.
1657 *
1658 * @returns VBox status code.
1659 * @retval VINF_SUCCESS if successfully added.
1660 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1661 * @param pPool The pool.
1662 * @param iUser The user index.
1663 */
1664static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1665{
1666 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1667#ifdef PGMPOOL_WITH_CACHE
1668 /*
1669 * Just free cached pages in a braindead fashion.
1670 */
1671 /** @todo walk the age list backwards and free the first with usage. */
1672 int rc = VINF_SUCCESS;
1673 do
1674 {
1675 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1676 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1677 rc = rc2;
1678 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1679 return rc;
1680#else
1681 /*
1682 * Lazy approach.
1683 */
1684 pgmPoolFlushAllInt(pPool);
1685 return VERR_PGM_POOL_FLUSHED;
1686#endif
1687}
1688
1689
1690/**
1691 * Inserts a page into the cache.
1692 *
1693 * This will create user node for the page, insert it into the GCPhys
1694 * hash, and insert it into the age list.
1695 *
1696 * @returns VBox status code.
1697 * @retval VINF_SUCCESS if successfully added.
1698 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1699 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1700 * @param pPool The pool.
1701 * @param pPage The cached page.
1702 * @param GCPhys The GC physical address of the page we're gonna shadow.
1703 * @param iUser The user index.
1704 * @param iUserTable The user table index.
1705 */
1706DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1707{
1708 int rc = VINF_SUCCESS;
1709 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1710
1711 /*
1712 * Find free a user node.
1713 */
1714 uint16_t i = pPool->iUserFreeHead;
1715 if (i == NIL_PGMPOOL_USER_INDEX)
1716 {
1717 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1718 if (VBOX_FAILURE(rc))
1719 return rc;
1720 i = pPool->iUserFreeHead;
1721 }
1722
1723 /*
1724 * Unlink the user node from the free list,
1725 * initialize and insert it into the user list.
1726 */
1727 pPool->iUserFreeHead = pUser[i].iNext;
1728 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1729 pUser[i].iUser = iUser;
1730 pUser[i].iUserTable = iUserTable;
1731 pPage->iUserHead = i;
1732
1733 /*
1734 * Insert into cache and enable monitoring of the guest page if enabled.
1735 *
1736 * Until we implement caching of all levels, including the CR3 one, we'll
1737 * have to make sure we don't try monitor & cache any recursive reuse of
1738 * a monitored CR3 page. Because all windows versions are doing this we'll
1739 * have to be able to do combined access monitoring, CR3 + PT and
1740 * PD + PT (guest PAE).
1741 *
1742 * Update:
1743 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1744 */
1745#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1746# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1747 const bool fCanBeMonitored = true;
1748# else
1749 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1750 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1751 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1752# endif
1753# ifdef PGMPOOL_WITH_CACHE
1754 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1755# endif
1756 if (fCanBeMonitored)
1757 {
1758# ifdef PGMPOOL_WITH_MONITORING
1759 rc = pgmPoolMonitorInsert(pPool, pPage);
1760 if (rc == VERR_PGM_POOL_CLEARED)
1761 {
1762 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1763# ifndef PGMPOOL_WITH_CACHE
1764 pgmPoolMonitorFlush(pPool, pPage);
1765 rc = VERR_PGM_POOL_FLUSHED;
1766# endif
1767 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1768 pUser[i].iNext = pPool->iUserFreeHead;
1769 pUser[i].iUser = NIL_PGMPOOL_IDX;
1770 pPool->iUserFreeHead = i;
1771 }
1772 }
1773# endif
1774#endif /* PGMPOOL_WITH_MONITORING */
1775 return rc;
1776}
1777
1778
1779# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1780/**
1781 * Adds a user reference to a page.
1782 *
1783 * This will
1784 * This will move the page to the head of the
1785 *
1786 * @returns VBox status code.
1787 * @retval VINF_SUCCESS if successfully added.
1788 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1789 * @param pPool The pool.
1790 * @param pPage The cached page.
1791 * @param iUser The user index.
1792 * @param iUserTable The user table.
1793 */
1794static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1795{
1796 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1797
1798# ifdef VBOX_STRICT
1799 /*
1800 * Check that the entry doesn't already exists.
1801 */
1802 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1803 {
1804 uint16_t i = pPage->iUserHead;
1805 do
1806 {
1807 Assert(i < pPool->cMaxUsers);
1808 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%d %d\n", iUser, iUserTable));
1809 i = paUsers[i].iNext;
1810 } while (i != NIL_PGMPOOL_USER_INDEX);
1811 }
1812# endif
1813
1814 /*
1815 * Allocate a user node.
1816 */
1817 uint16_t i = pPool->iUserFreeHead;
1818 if (i == NIL_PGMPOOL_USER_INDEX)
1819 {
1820 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1821 if (VBOX_FAILURE(rc))
1822 return rc;
1823 i = pPool->iUserFreeHead;
1824 }
1825 pPool->iUserFreeHead = paUsers[i].iNext;
1826
1827 /*
1828 * Initialize the user node and insert it.
1829 */
1830 paUsers[i].iNext = pPage->iUserHead;
1831 paUsers[i].iUser = iUser;
1832 paUsers[i].iUserTable = iUserTable;
1833 pPage->iUserHead = i;
1834
1835# ifdef PGMPOOL_WITH_CACHE
1836 /*
1837 * Tell the cache to update its replacement stats for this page.
1838 */
1839 pgmPoolCacheUsed(pPool, pPage);
1840# endif
1841 return VINF_SUCCESS;
1842}
1843# endif /* PGMPOOL_WITH_CACHE */
1844
1845
1846/**
1847 * Frees a user record associated with a page.
1848 *
1849 * This does not clear the entry in the user table, it simply replaces the
1850 * user record to the chain of free records.
1851 *
1852 * @param pPool The pool.
1853 * @param HCPhys The HC physical address of the shadow page.
1854 * @param iUser The shadow page pool index of the user table.
1855 * @param iUserTable The index into the user table (shadowed).
1856 */
1857static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1858{
1859 /*
1860 * Unlink and free the specified user entry.
1861 */
1862 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1863
1864 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1865 uint16_t i = pPage->iUserHead;
1866 if ( i != NIL_PGMPOOL_USER_INDEX
1867 && paUsers[i].iUser == iUser
1868 && paUsers[i].iUserTable == iUserTable)
1869 {
1870 pPage->iUserHead = paUsers[i].iNext;
1871
1872 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1873 paUsers[i].iNext = pPool->iUserFreeHead;
1874 pPool->iUserFreeHead = i;
1875 return;
1876 }
1877
1878 /* General: Linear search. */
1879 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1880 while (i != NIL_PGMPOOL_USER_INDEX)
1881 {
1882 if ( paUsers[i].iUser == iUser
1883 && paUsers[i].iUserTable == iUserTable)
1884 {
1885 if (iPrev != NIL_PGMPOOL_USER_INDEX)
1886 paUsers[iPrev].iNext = paUsers[i].iNext;
1887 else
1888 pPage->iUserHead = paUsers[i].iNext;
1889
1890 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1891 paUsers[i].iNext = pPool->iUserFreeHead;
1892 pPool->iUserFreeHead = i;
1893 return;
1894 }
1895 iPrev = i;
1896 i = paUsers[i].iNext;
1897 }
1898
1899 /* Fatal: didn't find it */
1900 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
1901 iUser, iUserTable, pPage->GCPhys));
1902}
1903
1904
1905/**
1906 * Gets the entry size of a shadow table.
1907 *
1908 * @param enmKind The kind of page.
1909 *
1910 * @returns The size of the entry in bytes. That is, 4 or 8.
1911 * @returns If the kind is not for a table, an assertion is raised and 0 is
1912 * returned.
1913 */
1914DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
1915{
1916 switch (enmKind)
1917 {
1918 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1919 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1920 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1921 case PGMPOOLKIND_ROOT_32BIT_PD:
1922 return 4;
1923
1924 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1925 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1926 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1927 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1928 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1929 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1930 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1931 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1932 case PGMPOOLKIND_ROOT_PAE_PD:
1933 case PGMPOOLKIND_ROOT_PDPTR:
1934 case PGMPOOLKIND_ROOT_PML4:
1935 return 8;
1936
1937 default:
1938 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1939 }
1940}
1941
1942
1943/**
1944 * Gets the entry size of a guest table.
1945 *
1946 * @param enmKind The kind of page.
1947 *
1948 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
1949 * @returns If the kind is not for a table, an assertion is raised and 0 is
1950 * returned.
1951 */
1952DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
1953{
1954 switch (enmKind)
1955 {
1956 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1957 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1958 case PGMPOOLKIND_ROOT_32BIT_PD:
1959 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1960 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1961 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1962 return 4;
1963
1964 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1965 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1966 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1967 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1968 case PGMPOOLKIND_ROOT_PAE_PD:
1969 case PGMPOOLKIND_ROOT_PDPTR:
1970 case PGMPOOLKIND_ROOT_PML4:
1971 return 8;
1972
1973 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1974 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1975 /** @todo can we return 0? (nobody is calling this...) */
1976 return 0;
1977
1978 default:
1979 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1980 }
1981}
1982
1983
1984#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1985/**
1986 * Scans one shadow page table for mappings of a physical page.
1987 *
1988 * @param pVM The VM handle.
1989 * @param pPhysPage The guest page in question.
1990 * @param iShw The shadow page table.
1991 * @param cRefs The number of references made in that PT.
1992 */
1993static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
1994{
1995 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
1996 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1997
1998 /*
1999 * Assert sanity.
2000 */
2001 Assert(cRefs == 1);
2002 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2003 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2004
2005 /*
2006 * Then, clear the actual mappings to the page in the shadow PT.
2007 */
2008 switch (pPage->enmKind)
2009 {
2010 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2011 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2012 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2013 {
2014 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2015 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2016 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2017 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2018 {
2019 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2020 pPT->a[i].u = 0;
2021 cRefs--;
2022 if (!cRefs)
2023 return;
2024 }
2025#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2026 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2027 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2028 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2029 {
2030 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2031 pPT->a[i].u = 0;
2032 }
2033#endif
2034 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2035 break;
2036 }
2037
2038 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2039 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2040 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2041 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2042 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2043 {
2044 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2045 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2046 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2047 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2048 {
2049 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2050 pPT->a[i].u = 0;
2051 cRefs--;
2052 if (!cRefs)
2053 return;
2054 }
2055#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2056 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2057 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2058 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2059 {
2060 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2061 pPT->a[i].u = 0;
2062 }
2063#endif
2064 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2065 break;
2066 }
2067
2068 default:
2069 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2070 }
2071}
2072
2073
2074/**
2075 * Scans one shadow page table for mappings of a physical page.
2076 *
2077 * @param pVM The VM handle.
2078 * @param pPhysPage The guest page in question.
2079 * @param iShw The shadow page table.
2080 * @param cRefs The number of references made in that PT.
2081 */
2082void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2083{
2084 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2085 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2086 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2087 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2088 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2089 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2090}
2091
2092
2093/**
2094 * Flushes a list of shadow page tables mapping the same physical page.
2095 *
2096 * @param pVM The VM handle.
2097 * @param pPhysPage The guest page in question.
2098 * @param iPhysExt The physical cross reference extent list to flush.
2099 */
2100void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2101{
2102 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2103 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2104 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2105
2106 const uint16_t iPhysExtStart = iPhysExt;
2107 PPGMPOOLPHYSEXT pPhysExt;
2108 do
2109 {
2110 Assert(iPhysExt < pPool->cMaxPhysExts);
2111 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2112 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2113 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2114 {
2115 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2116 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2117 }
2118
2119 /* next */
2120 iPhysExt = pPhysExt->iNext;
2121 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2122
2123 /* insert the list into the free list and clear the ram range entry. */
2124 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2125 pPool->iPhysExtFreeHead = iPhysExtStart;
2126 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2127
2128 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2129}
2130#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2131
2132
2133/**
2134 * Scans all shadow page tables for mappings of a physical page.
2135 *
2136 * This may be slow, but it's most likely more efficient than cleaning
2137 * out the entire page pool / cache.
2138 *
2139 * @returns VBox status code.
2140 * @retval VINF_SUCCESS if all references has been successfully cleared.
2141 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2142 * a page pool cleaning.
2143 *
2144 * @param pVM The VM handle.
2145 * @param pPhysPage The guest page in question.
2146 */
2147int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2148{
2149 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2150 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2151 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2152 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2153
2154#if 1
2155 /*
2156 * There is a limit to what makes sense.
2157 */
2158 if (pPool->cPresent > 1024)
2159 {
2160 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2161 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2162 return VINF_PGM_GCPHYS_ALIASED;
2163 }
2164#endif
2165
2166 /*
2167 * Iterate all the pages until we've encountered all that in use.
2168 * This is simple but not quite optimal solution.
2169 */
2170 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2171 const uint32_t u32 = u64;
2172 unsigned cLeft = pPool->cUsedPages;
2173 unsigned iPage = pPool->cCurPages;
2174 while (--iPage >= PGMPOOL_IDX_FIRST)
2175 {
2176 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2177 if (pPage->GCPhys != NIL_RTGCPHYS)
2178 {
2179 switch (pPage->enmKind)
2180 {
2181 /*
2182 * We only care about shadow page tables.
2183 */
2184 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2185 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2186 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2187 {
2188 unsigned cPresent = pPage->cPresent;
2189 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2190 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2191 if (pPT->a[i].n.u1Present)
2192 {
2193 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2194 {
2195 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2196 pPT->a[i].u = 0;
2197 }
2198 if (!--cPresent)
2199 break;
2200 }
2201 break;
2202 }
2203
2204 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2205 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2206 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2207 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2208 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2209 {
2210 unsigned cPresent = pPage->cPresent;
2211 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2212 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2213 if (pPT->a[i].n.u1Present)
2214 {
2215 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2216 {
2217 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2218 pPT->a[i].u = 0;
2219 }
2220 if (!--cPresent)
2221 break;
2222 }
2223 break;
2224 }
2225 }
2226 if (!--cLeft)
2227 break;
2228 }
2229 }
2230
2231 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2232 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2233 return VINF_SUCCESS;
2234}
2235
2236
2237/**
2238 * Clears the user entry in a user table.
2239 *
2240 * This is used to remove all references to a page when flushing it.
2241 */
2242static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2243{
2244 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2245 Assert(pUser->iUser < pPool->cCurPages);
2246
2247 /*
2248 * Map the user page.
2249 */
2250 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2251 union
2252 {
2253 uint64_t *pau64;
2254 uint32_t *pau32;
2255 } u;
2256 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2257
2258#ifdef VBOX_STRICT
2259 /*
2260 * Some sanity checks.
2261 */
2262 switch (pUserPage->enmKind)
2263 {
2264 case PGMPOOLKIND_ROOT_32BIT_PD:
2265 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2266 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2267 break;
2268 case PGMPOOLKIND_ROOT_PAE_PD:
2269 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2270 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2271 break;
2272 case PGMPOOLKIND_ROOT_PDPTR:
2273 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2274 Assert(pUser->iUserTable < 4);
2275 break;
2276 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2277 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2278 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2279 break;
2280 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2281 case PGMPOOLKIND_ROOT_PML4:
2282 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2283 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2284 break;
2285 default:
2286 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2287 break;
2288 }
2289#endif /* VBOX_STRICT */
2290
2291 /*
2292 * Clear the entry in the user page.
2293 */
2294 switch (pUserPage->enmKind)
2295 {
2296 /* 32-bit entries */
2297 case PGMPOOLKIND_ROOT_32BIT_PD:
2298 u.pau32[pUser->iUserTable] = 0;
2299 break;
2300
2301 /* 64-bit entries */
2302 case PGMPOOLKIND_ROOT_PAE_PD:
2303 case PGMPOOLKIND_ROOT_PDPTR:
2304 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2305 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2306 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2307 case PGMPOOLKIND_ROOT_PML4:
2308 u.pau64[pUser->iUserTable] = 0;
2309 break;
2310
2311 default:
2312 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2313 }
2314}
2315
2316
2317/**
2318 * Clears all users of a page.
2319 */
2320static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2321{
2322 /*
2323 * Free all the user records.
2324 */
2325 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2326 uint16_t i = pPage->iUserHead;
2327 while (i != NIL_PGMPOOL_USER_INDEX)
2328 {
2329 /* Clear enter in user table. */
2330 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2331
2332 /* Free it. */
2333 const uint16_t iNext = paUsers[i].iNext;
2334 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2335 paUsers[i].iNext = pPool->iUserFreeHead;
2336 pPool->iUserFreeHead = i;
2337
2338 /* Next. */
2339 i = iNext;
2340 }
2341 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2342}
2343
2344
2345#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2346/**
2347 * Allocates a new physical cross reference extent.
2348 *
2349 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2350 * @param pVM The VM handle.
2351 * @param piPhysExt Where to store the phys ext index.
2352 */
2353PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2354{
2355 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2356 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2357 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2358 {
2359 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2360 return NULL;
2361 }
2362 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2363 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2364 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2365 *piPhysExt = iPhysExt;
2366 return pPhysExt;
2367}
2368
2369
2370/**
2371 * Frees a physical cross reference extent.
2372 *
2373 * @param pVM The VM handle.
2374 * @param iPhysExt The extent to free.
2375 */
2376void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2377{
2378 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2379 Assert(iPhysExt < pPool->cMaxPhysExts);
2380 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2381 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2382 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2383 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2384 pPool->iPhysExtFreeHead = iPhysExt;
2385}
2386
2387
2388/**
2389 * Frees a physical cross reference extent.
2390 *
2391 * @param pVM The VM handle.
2392 * @param iPhysExt The extent to free.
2393 */
2394void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2395{
2396 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2397
2398 const uint16_t iPhysExtStart = iPhysExt;
2399 PPGMPOOLPHYSEXT pPhysExt;
2400 do
2401 {
2402 Assert(iPhysExt < pPool->cMaxPhysExts);
2403 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2404 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2405 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2406
2407 /* next */
2408 iPhysExt = pPhysExt->iNext;
2409 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2410
2411 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2412 pPool->iPhysExtFreeHead = iPhysExtStart;
2413}
2414
2415/**
2416 * Insert a reference into a list of physical cross reference extents.
2417 *
2418 * @returns The new ram range flags (top 16-bits).
2419 *
2420 * @param pVM The VM handle.
2421 * @param iPhysExt The physical extent index of the list head.
2422 * @param iShwPT The shadow page table index.
2423 *
2424 */
2425static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2426{
2427 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2428 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2429
2430 /* special common case. */
2431 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2432 {
2433 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2434 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2435 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2436 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2437 }
2438
2439 /* general treatment. */
2440 const uint16_t iPhysExtStart = iPhysExt;
2441 unsigned cMax = 15;
2442 for (;;)
2443 {
2444 Assert(iPhysExt < pPool->cMaxPhysExts);
2445 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2446 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2447 {
2448 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2449 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2450 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2451 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2452 }
2453 if (!--cMax)
2454 {
2455 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2456 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2457 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2458 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2459 }
2460 }
2461
2462 /* add another extent to the list. */
2463 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2464 if (!pNew)
2465 {
2466 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2467 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2468 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2469 }
2470 pNew->iNext = iPhysExtStart;
2471 pNew->aidx[0] = iShwPT;
2472 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2473 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2474}
2475
2476
2477/**
2478 * Add a reference to guest physical page where extents are in use.
2479 *
2480 * @returns The new ram range flags (top 16-bits).
2481 *
2482 * @param pVM The VM handle.
2483 * @param u16 The ram range flags (top 16-bits).
2484 * @param iShwPT The shadow page table index.
2485 */
2486uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2487{
2488 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2489 {
2490 /*
2491 * Convert to extent list.
2492 */
2493 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2494 uint16_t iPhysExt;
2495 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2496 if (pPhysExt)
2497 {
2498 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2499 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2500 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2501 pPhysExt->aidx[1] = iShwPT;
2502 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2503 }
2504 else
2505 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2506 }
2507 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2508 {
2509 /*
2510 * Insert into the extent list.
2511 */
2512 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2513 }
2514 else
2515 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2516 return u16;
2517}
2518
2519
2520/**
2521 * Clear references to guest physical memory.
2522 *
2523 * @param pPool The pool.
2524 * @param pPage The page.
2525 * @param pPhysPage Pointer to the aPages entry in the ram range.
2526 */
2527void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2528{
2529 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2530 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2531
2532 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2533 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2534 {
2535 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2536 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2537 do
2538 {
2539 Assert(iPhysExt < pPool->cMaxPhysExts);
2540
2541 /*
2542 * Look for the shadow page and check if it's all freed.
2543 */
2544 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2545 {
2546 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2547 {
2548 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2549
2550 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2551 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2552 {
2553 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2554 return;
2555 }
2556
2557 /* we can free the node. */
2558 PVM pVM = pPool->CTXSUFF(pVM);
2559 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2560 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2561 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2562 {
2563 /* lonely node */
2564 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2565 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2566 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2567 }
2568 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2569 {
2570 /* head */
2571 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2572 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2573 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2574 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2575 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2576 }
2577 else
2578 {
2579 /* in list */
2580 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2581 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2582 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2583 }
2584 iPhysExt = iPhysExtNext;
2585 return;
2586 }
2587 }
2588
2589 /* next */
2590 iPhysExtPrev = iPhysExt;
2591 iPhysExt = paPhysExts[iPhysExt].iNext;
2592 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2593
2594 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2595 }
2596 else /* nothing to do */
2597 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
2598}
2599
2600
2601
2602/**
2603 * Clear references to guest physical memory.
2604 *
2605 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2606 * is assumed to be correct, so the linear search can be skipped and we can assert
2607 * at an earlier point.
2608 *
2609 * @param pPool The pool.
2610 * @param pPage The page.
2611 * @param HCPhys The host physical address corresponding to the guest page.
2612 * @param GCPhys The guest physical address corresponding to HCPhys.
2613 */
2614static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2615{
2616 /*
2617 * Walk range list.
2618 */
2619 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2620 while (pRam)
2621 {
2622 RTGCPHYS off = GCPhys - pRam->GCPhys;
2623 if (off < pRam->cb)
2624 {
2625 /* does it match? */
2626 const unsigned iPage = off >> PAGE_SHIFT;
2627 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2628 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2629 {
2630 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2631 return;
2632 }
2633 break;
2634 }
2635 pRam = CTXSUFF(pRam->pNext);
2636 }
2637 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2638}
2639
2640
2641/**
2642 * Clear references to guest physical memory.
2643 *
2644 * @param pPool The pool.
2645 * @param pPage The page.
2646 * @param HCPhys The host physical address corresponding to the guest page.
2647 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2648 */
2649static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2650{
2651 /*
2652 * Walk range list.
2653 */
2654 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2655 while (pRam)
2656 {
2657 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2658 if (off < pRam->cb)
2659 {
2660 /* does it match? */
2661 const unsigned iPage = off >> PAGE_SHIFT;
2662 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2663 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2664 {
2665 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2666 return;
2667 }
2668 break;
2669 }
2670 pRam = CTXSUFF(pRam->pNext);
2671 }
2672
2673 /*
2674 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2675 */
2676 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2677 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2678 while (pRam)
2679 {
2680 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2681 while (iPage-- > 0)
2682 {
2683 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2684 {
2685 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2686 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2687 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2688 return;
2689 }
2690 }
2691 pRam = CTXSUFF(pRam->pNext);
2692 }
2693
2694 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2695}
2696
2697
2698/**
2699 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2700 *
2701 * @param pPool The pool.
2702 * @param pPage The page.
2703 * @param pShwPT The shadow page table (mapping of the page).
2704 * @param pGstPT The guest page table.
2705 */
2706DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2707{
2708 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2709 if (pShwPT->a[i].n.u1Present)
2710 {
2711 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2712 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2713 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2714 if (!--pPage->cPresent)
2715 break;
2716 }
2717}
2718
2719
2720/**
2721 * Clear references to guest physical memory in a PAE / 32-bit page table.
2722 *
2723 * @param pPool The pool.
2724 * @param pPage The page.
2725 * @param pShwPT The shadow page table (mapping of the page).
2726 * @param pGstPT The guest page table (just a half one).
2727 */
2728DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2729{
2730 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2731 if (pShwPT->a[i].n.u1Present)
2732 {
2733 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2734 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2735 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2736 }
2737}
2738
2739
2740/**
2741 * Clear references to guest physical memory in a PAE / PAE page table.
2742 *
2743 * @param pPool The pool.
2744 * @param pPage The page.
2745 * @param pShwPT The shadow page table (mapping of the page).
2746 * @param pGstPT The guest page table.
2747 */
2748DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2749{
2750 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2751 if (pShwPT->a[i].n.u1Present)
2752 {
2753 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
2754 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
2755 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2756 }
2757}
2758
2759
2760/**
2761 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2762 *
2763 * @param pPool The pool.
2764 * @param pPage The page.
2765 * @param pShwPT The shadow page table (mapping of the page).
2766 */
2767DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2768{
2769 RTGCPHYS GCPhys = pPage->GCPhys;
2770 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2771 if (pShwPT->a[i].n.u1Present)
2772 {
2773 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2774 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2775 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2776 }
2777}
2778
2779
2780/**
2781 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2782 *
2783 * @param pPool The pool.
2784 * @param pPage The page.
2785 * @param pShwPT The shadow page table (mapping of the page).
2786 */
2787DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2788{
2789 RTGCPHYS GCPhys = pPage->GCPhys;
2790 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2791 if (pShwPT->a[i].n.u1Present)
2792 {
2793 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2794 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
2795 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2796 }
2797}
2798#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2799
2800
2801/**
2802 * Clear references to shadowed pages in a PAE page directory.
2803 *
2804 * @param pPool The pool.
2805 * @param pPage The page.
2806 * @param pShwPD The shadow page directory (mapping of the page).
2807 */
2808DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2809{
2810 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2811 {
2812 if (pShwPD->a[i].n.u1Present)
2813 {
2814 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2815 if (pSubPage)
2816 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2817 else
2818 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2819 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2820 }
2821 }
2822}
2823
2824
2825/**
2826 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2827 *
2828 * @param pPool The pool.
2829 * @param pPage The page.
2830 * @param pShwPdPtr The shadow page directory pointer table (mapping of the page).
2831 */
2832DECLINLINE(void) pgmPoolTrackDerefPDPTR64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPTR pShwPdPtr)
2833{
2834 for (unsigned i = 0; i < ELEMENTS(pShwPdPtr->a); i++)
2835 {
2836 if (pShwPdPtr->a[i].n.u1Present)
2837 {
2838 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPdPtr->a[i].u & X86_PDPE_PG_MASK);
2839 if (pSubPage)
2840 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2841 else
2842 AssertFatalMsgFailed(("%RX64\n", pShwPdPtr->a[i].u & X86_PDPE_PG_MASK));
2843 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2844 }
2845 }
2846}
2847
2848
2849/**
2850 * Clears all references made by this page.
2851 *
2852 * This includes other shadow pages and GC physical addresses.
2853 *
2854 * @param pPool The pool.
2855 * @param pPage The page.
2856 */
2857static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2858{
2859 /*
2860 * Map the shadow page and take action according to the page kind.
2861 */
2862 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2863 switch (pPage->enmKind)
2864 {
2865#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2866 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2867 {
2868 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2869 void *pvGst;
2870 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2871 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2872 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2873 break;
2874 }
2875
2876 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2877 {
2878 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2879 void *pvGst;
2880 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2881 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2882 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2883 break;
2884 }
2885
2886 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2887 {
2888 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2889 void *pvGst;
2890 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2891 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
2892 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2893 break;
2894 }
2895
2896 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
2897 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2898 {
2899 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2900 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
2901 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2902 break;
2903 }
2904
2905 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
2906 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2907 {
2908 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2909 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
2910 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2911 break;
2912 }
2913
2914#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2915 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2916 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2917 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2918 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2919 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2920 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2921 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2922 break;
2923#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2924
2925 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2926 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2927 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
2928 break;
2929
2930 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2931 pgmPoolTrackDerefPDPTR64Bit(pPool, pPage, (PX86PDPTR)pvShw);
2932 break;
2933
2934 default:
2935 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
2936 }
2937
2938 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
2939 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2940 ASMMemZeroPage(pvShw);
2941 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2942 pPage->fZeroed = true;
2943}
2944#endif /* PGMPOOL_WITH_USER_TRACKING */
2945
2946
2947/**
2948 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
2949 *
2950 * @param pPool The pool.
2951 */
2952static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
2953{
2954 /*
2955 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
2956 */
2957 Assert(NIL_PGMPOOL_IDX == 0);
2958 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
2959 {
2960 /*
2961 * Get the page address.
2962 */
2963 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2964 union
2965 {
2966 uint64_t *pau64;
2967 uint32_t *pau32;
2968 } u;
2969 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2970
2971 /*
2972 * Mark stuff not present.
2973 */
2974 switch (pPage->enmKind)
2975 {
2976 case PGMPOOLKIND_ROOT_32BIT_PD:
2977 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
2978 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2979 u.pau32[iPage] = 0;
2980 break;
2981
2982 case PGMPOOLKIND_ROOT_PAE_PD:
2983 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * 4; iPage++)
2984 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2985 u.pau64[iPage] = 0;
2986 break;
2987
2988 case PGMPOOLKIND_ROOT_PML4:
2989 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
2990 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
2991 u.pau64[iPage] = 0;
2992 break;
2993
2994 case PGMPOOLKIND_ROOT_PDPTR:
2995 /* Not root of shadowed pages currently, ignore it. */
2996 break;
2997 }
2998 }
2999
3000 /*
3001 * Paranoia (to be removed), flag a global CR3 sync.
3002 */
3003 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3004}
3005
3006
3007/**
3008 * Flushes the entire cache.
3009 *
3010 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3011 * and execute this CR3 flush.
3012 *
3013 * @param pPool The pool.
3014 */
3015static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3016{
3017 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3018 LogFlow(("pgmPoolFlushAllInt:\n"));
3019
3020 /*
3021 * If there are no pages in the pool, there is nothing to do.
3022 */
3023 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3024 {
3025 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3026 return;
3027 }
3028
3029 /*
3030 * Nuke the free list and reinsert all pages into it.
3031 */
3032 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3033 {
3034 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3035
3036#ifdef IN_RING3
3037 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3038#endif
3039#ifdef PGMPOOL_WITH_MONITORING
3040 if (pPage->fMonitored)
3041 pgmPoolMonitorFlush(pPool, pPage);
3042 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3043 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3044 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3045 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3046 pPage->cModifications = 0;
3047#endif
3048 pPage->GCPhys = NIL_RTGCPHYS;
3049 pPage->enmKind = PGMPOOLKIND_FREE;
3050 Assert(pPage->idx == i);
3051 pPage->iNext = i + 1;
3052 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3053 pPage->fSeenNonGlobal = false;
3054 pPage->fMonitored= false;
3055 pPage->fCached = false;
3056 pPage->fReusedFlushPending = false;
3057 pPage->fCR3Mix = false;
3058#ifdef PGMPOOL_WITH_USER_TRACKING
3059 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3060#endif
3061#ifdef PGMPOOL_WITH_CACHE
3062 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3063 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3064#endif
3065 }
3066 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3067 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3068 pPool->cUsedPages = 0;
3069
3070#ifdef PGMPOOL_WITH_USER_TRACKING
3071 /*
3072 * Zap and reinitialize the user records.
3073 */
3074 pPool->cPresent = 0;
3075 pPool->iUserFreeHead = 0;
3076 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3077 const unsigned cMaxUsers = pPool->cMaxUsers;
3078 for (unsigned i = 0; i < cMaxUsers; i++)
3079 {
3080 paUsers[i].iNext = i + 1;
3081 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3082 paUsers[i].iUserTable = 0xfffe;
3083 }
3084 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3085#endif
3086
3087#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3088 /*
3089 * Clear all the GCPhys links and rebuild the phys ext free list.
3090 */
3091 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
3092 pRam;
3093 pRam = pRam->CTXSUFF(pNext))
3094 {
3095 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3096 while (iPage-- > 0)
3097 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3098 }
3099
3100 pPool->iPhysExtFreeHead = 0;
3101 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3102 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3103 for (unsigned i = 0; i < cMaxPhysExts; i++)
3104 {
3105 paPhysExts[i].iNext = i + 1;
3106 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3107 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3108 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3109 }
3110 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3111#endif
3112
3113#ifdef PGMPOOL_WITH_MONITORING
3114 /*
3115 * Just zap the modified list.
3116 */
3117 pPool->cModifiedPages = 0;
3118 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3119#endif
3120
3121#ifdef PGMPOOL_WITH_CACHE
3122 /*
3123 * Clear the GCPhys hash and the age list.
3124 */
3125 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3126 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3127 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3128 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3129#endif
3130
3131 /*
3132 * Flush all the special root pages.
3133 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3134 */
3135 pgmPoolFlushAllSpecialRoots(pPool);
3136 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3137 {
3138 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3139 pPage->iNext = NIL_PGMPOOL_IDX;
3140#ifdef PGMPOOL_WITH_MONITORING
3141 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3142 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3143 pPage->cModifications = 0;
3144 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3145 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3146 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3147 if (pPage->fMonitored)
3148 {
3149 PVM pVM = pPool->CTXSUFF(pVM);
3150 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3151 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3152 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3153 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
3154 pPool->pszAccessHandler);
3155 AssertFatalRCSuccess(rc);
3156# ifdef PGMPOOL_WITH_CACHE
3157 pgmPoolHashInsert(pPool, pPage);
3158# endif
3159 }
3160#endif
3161#ifdef PGMPOOL_WITH_USER_TRACKING
3162 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3163#endif
3164#ifdef PGMPOOL_WITH_CACHE
3165 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3166 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3167#endif
3168 }
3169
3170 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3171}
3172
3173
3174/**
3175 * Flushes a pool page.
3176 *
3177 * This moves the page to the free list after removing all user references to it.
3178 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3179 *
3180 * @returns VBox status code.
3181 * @retval VINF_SUCCESS on success.
3182 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3183 * @param pPool The pool.
3184 * @param HCPhys The HC physical address of the shadow page.
3185 */
3186int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3187{
3188 int rc = VINF_SUCCESS;
3189 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3190 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3191 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3192
3193 /*
3194 * Quietly reject any attempts at flushing any of the special root pages.
3195 */
3196 if (pPage->idx < PGMPOOL_IDX_FIRST)
3197 {
3198 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3199 return VINF_SUCCESS;
3200 }
3201
3202 /*
3203 * Mark the page as being in need of a ASMMemZeroPage().
3204 */
3205 pPage->fZeroed = false;
3206
3207#ifdef PGMPOOL_WITH_USER_TRACKING
3208 /*
3209 * Clear the page.
3210 */
3211 pgmPoolTrackClearPageUsers(pPool, pPage);
3212 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3213 pgmPoolTrackDeref(pPool, pPage);
3214 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3215#endif
3216
3217#ifdef PGMPOOL_WITH_CACHE
3218 /*
3219 * Flush it from the cache.
3220 */
3221 pgmPoolCacheFlushPage(pPool, pPage);
3222#endif /* PGMPOOL_WITH_CACHE */
3223
3224#ifdef PGMPOOL_WITH_MONITORING
3225 /*
3226 * Deregistering the monitoring.
3227 */
3228 if (pPage->fMonitored)
3229 rc = pgmPoolMonitorFlush(pPool, pPage);
3230#endif
3231
3232 /*
3233 * Free the page.
3234 */
3235 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3236 pPage->iNext = pPool->iFreeHead;
3237 pPool->iFreeHead = pPage->idx;
3238 pPage->enmKind = PGMPOOLKIND_FREE;
3239 pPage->GCPhys = NIL_RTGCPHYS;
3240 pPage->fReusedFlushPending = false;
3241
3242 pPool->cUsedPages--;
3243 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3244 return rc;
3245}
3246
3247
3248/**
3249 * Frees a usage of a pool page.
3250 *
3251 * The caller is responsible to updating the user table so that it no longer
3252 * references the shadow page.
3253 *
3254 * @param pPool The pool.
3255 * @param HCPhys The HC physical address of the shadow page.
3256 * @param iUser The shadow page pool index of the user table.
3257 * @param iUserTable The index into the user table (shadowed).
3258 */
3259void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3260{
3261 STAM_PROFILE_START(&pPool->StatFree, a);
3262 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3263 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3264 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3265#ifdef PGMPOOL_WITH_USER_TRACKING
3266 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3267#endif
3268#ifdef PGMPOOL_WITH_CACHE
3269 if (!pPage->fCached)
3270#endif
3271 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3272 STAM_PROFILE_STOP(&pPool->StatFree, a);
3273}
3274
3275
3276/**
3277 * Makes one or more free page free.
3278 *
3279 * @returns VBox status code.
3280 * @retval VINF_SUCCESS on success.
3281 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3282 *
3283 * @param pPool The pool.
3284 * @param iUser The user of the page.
3285 */
3286static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3287{
3288 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3289
3290 /*
3291 * If the pool isn't full grown yet, expand it.
3292 */
3293 if (pPool->cCurPages < pPool->cMaxPages)
3294 {
3295 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3296#ifdef IN_RING3
3297 int rc = PGMR3PoolGrow(pPool->pVMHC);
3298#else
3299 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3300#endif
3301 if (VBOX_FAILURE(rc))
3302 return rc;
3303 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3304 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3305 return VINF_SUCCESS;
3306 }
3307
3308#ifdef PGMPOOL_WITH_CACHE
3309 /*
3310 * Free one cached page.
3311 */
3312 return pgmPoolCacheFreeOne(pPool, iUser);
3313#else
3314 /*
3315 * Flush the pool.
3316 * If we have tracking enabled, it should be possible to come up with
3317 * a cheap replacement strategy...
3318 */
3319 pgmPoolFlushAllInt(pPool);
3320 return VERR_PGM_POOL_FLUSHED;
3321#endif
3322}
3323
3324
3325/**
3326 * Allocates a page from the pool.
3327 *
3328 * This page may actually be a cached page and not in need of any processing
3329 * on the callers part.
3330 *
3331 * @returns VBox status code.
3332 * @retval VINF_SUCCESS if a NEW page was allocated.
3333 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3334 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3335 * @param pVM The VM handle.
3336 * @param GCPhys The GC physical address of the page we're gonna shadow.
3337 * For 4MB and 2MB PD entries, it's the first address the
3338 * shadow PT is covering.
3339 * @param enmKind The kind of mapping.
3340 * @param iUser The shadow page pool index of the user table.
3341 * @param iUserTable The index into the user table (shadowed).
3342 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3343 */
3344int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3345{
3346 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3347 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3348 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3349
3350 *ppPage = NULL;
3351
3352#ifdef PGMPOOL_WITH_CACHE
3353 if (pPool->fCacheEnabled)
3354 {
3355 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3356 if (VBOX_SUCCESS(rc2))
3357 {
3358 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3359 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3360 return rc2;
3361 }
3362 }
3363#endif
3364
3365 /*
3366 * Allocate a new one.
3367 */
3368 int rc = VINF_SUCCESS;
3369 uint16_t iNew = pPool->iFreeHead;
3370 if (iNew == NIL_PGMPOOL_IDX)
3371 {
3372 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3373 if (VBOX_FAILURE(rc))
3374 {
3375 if (rc != VERR_PGM_POOL_CLEARED)
3376 {
3377 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3378 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3379 return rc;
3380 }
3381 rc = VERR_PGM_POOL_FLUSHED;
3382 }
3383 iNew = pPool->iFreeHead;
3384 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3385 }
3386
3387 /* unlink the free head */
3388 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3389 pPool->iFreeHead = pPage->iNext;
3390 pPage->iNext = NIL_PGMPOOL_IDX;
3391
3392 /*
3393 * Initialize it.
3394 */
3395 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3396 pPage->enmKind = enmKind;
3397 pPage->GCPhys = GCPhys;
3398 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3399 pPage->fMonitored = false;
3400 pPage->fCached = false;
3401 pPage->fReusedFlushPending = false;
3402 pPage->fCR3Mix = false;
3403#ifdef PGMPOOL_WITH_MONITORING
3404 pPage->cModifications = 0;
3405 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3406 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3407#endif
3408#ifdef PGMPOOL_WITH_USER_TRACKING
3409 pPage->cPresent = 0;
3410 pPage->iFirstPresent = ~0;
3411
3412 /*
3413 * Insert into the tracking and cache. If this fails, free the page.
3414 */
3415 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3416 if (VBOX_FAILURE(rc3))
3417 {
3418 if (rc3 != VERR_PGM_POOL_CLEARED)
3419 {
3420 pPool->cUsedPages--;
3421 pPage->enmKind = PGMPOOLKIND_FREE;
3422 pPage->GCPhys = NIL_RTGCPHYS;
3423 pPage->iNext = pPool->iFreeHead;
3424 pPool->iFreeHead = pPage->idx;
3425 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3426 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3427 return rc3;
3428 }
3429 rc = VERR_PGM_POOL_FLUSHED;
3430 }
3431#endif /* PGMPOOL_WITH_USER_TRACKING */
3432
3433 /*
3434 * Commit the allocation, clear the page and return.
3435 */
3436#ifdef VBOX_WITH_STATISTICS
3437 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3438 pPool->cUsedPagesHigh = pPool->cUsedPages;
3439#endif
3440
3441 if (!pPage->fZeroed)
3442 {
3443 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3444 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3445 ASMMemZeroPage(pv);
3446 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3447 }
3448
3449 *ppPage = pPage;
3450 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3451 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3452 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3453 return rc;
3454}
3455
3456
3457/**
3458 * Frees a usage of a pool page.
3459 *
3460 * @param pVM The VM handle.
3461 * @param HCPhys The HC physical address of the shadow page.
3462 * @param iUser The shadow page pool index of the user table.
3463 * @param iUserTable The index into the user table (shadowed).
3464 */
3465void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3466{
3467 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3468 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3469 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3470}
3471
3472
3473/**
3474 * Gets a in-use page in the pool by it's physical address.
3475 *
3476 * @returns Pointer to the page.
3477 * @param pVM The VM handle.
3478 * @param HCPhys The HC physical address of the shadow page.
3479 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3480 */
3481PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3482{
3483 /** @todo profile this! */
3484 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3485 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3486 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3487 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3488 return pPage;
3489}
3490
3491
3492/**
3493 * Flushes the entire cache.
3494 *
3495 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3496 * and execute this CR3 flush.
3497 *
3498 * @param pPool The pool.
3499 */
3500void pgmPoolFlushAll(PVM pVM)
3501{
3502 LogFlow(("pgmPoolFlushAll:\n"));
3503 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3504}
3505
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette