VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 7932

Last change on this file since 7932 was 7929, checked in by vboxsync, 17 years ago

And another

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 120.8 KB
Line 
1/* $Id: PGMAllPool.cpp 7929 2008-04-11 16:07:26Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_GC
28# include <VBox/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vm.h>
32#include <VBox/disopcode.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37
38
39/*******************************************************************************
40* Internal Functions *
41*******************************************************************************/
42__BEGIN_DECLS
43static void pgmPoolFlushAllInt(PPGMPOOL pPool);
44#ifdef PGMPOOL_WITH_USER_TRACKING
45DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
46DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
47static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
48#endif
49#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
50static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
51#endif
52#ifdef PGMPOOL_WITH_CACHE
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
54#endif
55#ifdef PGMPOOL_WITH_MONITORING
56static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
57#endif
58#ifndef IN_RING3
59DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
60#endif
61__END_DECLS
62
63
64/**
65 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
66 *
67 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
68 * @param enmKind The page kind.
69 */
70DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
71{
72 switch (enmKind)
73 {
74 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
75 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
76 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
77 return true;
78 default:
79 return false;
80 }
81}
82
83
84#ifdef IN_GC
85/**
86 * Maps a pool page into the current context.
87 *
88 * @returns Pointer to the mapping.
89 * @param pVM The VM handle.
90 * @param pPage The page to map.
91 */
92void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
93{
94 /* general pages. */
95 if (pPage->idx >= PGMPOOL_IDX_FIRST)
96 {
97 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
98 void *pv;
99 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
100 AssertReleaseRC(rc);
101 return pv;
102 }
103
104 /* special pages. */
105 switch (pPage->idx)
106 {
107 case PGMPOOL_IDX_PD:
108 return pVM->pgm.s.pGC32BitPD;
109 case PGMPOOL_IDX_PAE_PD:
110 return pVM->pgm.s.apGCPaePDs[0];
111 case PGMPOOL_IDX_PDPT:
112 return pVM->pgm.s.pGCPaePDPT;
113 case PGMPOOL_IDX_PML4:
114 return pVM->pgm.s.pGCPaePML4;
115 default:
116 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
117 return NULL;
118 }
119}
120#endif /* IN_GC */
121
122
123#ifdef PGMPOOL_WITH_MONITORING
124/**
125 * Determin the size of a write instruction.
126 * @returns number of bytes written.
127 * @param pDis The disassembler state.
128 */
129static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
130{
131 /*
132 * This is very crude and possibly wrong for some opcodes,
133 * but since it's not really supposed to be called we can
134 * probably live with that.
135 */
136 return DISGetParamSize(pDis, &pDis->param1);
137}
138
139
140/**
141 * Flushes a chain of pages sharing the same access monitor.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pPool The pool.
145 * @param pPage A page in the chain.
146 */
147int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
148{
149 /*
150 * Find the list head.
151 */
152 uint16_t idx = pPage->idx;
153 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
154 {
155 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
156 {
157 idx = pPage->iMonitoredPrev;
158 Assert(idx != pPage->idx);
159 pPage = &pPool->aPages[idx];
160 }
161 }
162
163 /*
164 * Itereate the list flushing each shadow page.
165 */
166 int rc = VINF_SUCCESS;
167 for (;;)
168 {
169 idx = pPage->iMonitoredNext;
170 Assert(idx != pPage->idx);
171 if (pPage->idx >= PGMPOOL_IDX_FIRST)
172 {
173 int rc2 = pgmPoolFlushPage(pPool, pPage);
174 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
175 rc = VINF_PGM_SYNC_CR3;
176 }
177 /* next */
178 if (idx == NIL_PGMPOOL_IDX)
179 break;
180 pPage = &pPool->aPages[idx];
181 }
182 return rc;
183}
184
185
186/**
187 * Wrapper for getting the current context pointer to the entry being modified.
188 *
189 * @returns Pointer to the current context mapping of the entry.
190 * @param pPool The pool.
191 * @param pvFault The fault virtual address.
192 * @param GCPhysFault The fault physical address.
193 * @param cbEntry The entry size.
194 */
195#ifdef IN_RING3
196DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
197#else
198DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
199#endif
200{
201#ifdef IN_GC
202 return (RTGCPTR)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
203
204#elif defined(IN_RING0)
205 void *pvRet;
206 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
207 AssertFatalRCSuccess(rc);
208 return pvRet;
209
210#elif defined(IN_RING3)
211 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
212#else
213# error "huh?"
214#endif
215}
216
217
218/**
219 * Process shadow entries before they are changed by the guest.
220 *
221 * For PT entries we will clear them. For PD entries, we'll simply check
222 * for mapping conflicts and set the SyncCR3 FF if found.
223 *
224 * @param pPool The pool.
225 * @param pPage The head page.
226 * @param GCPhysFault The guest physical fault address.
227 * @param uAddress In R0 and GC this is the guest context fault address (flat).
228 * In R3 this is the host context 'fault' address.
229 * @param pCpu The disassembler state for figuring out the write size.
230 * This need not be specified if the caller knows we won't do cross entry accesses.
231 */
232#ifdef IN_RING3
233void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
234#else
235void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
236#endif
237{
238 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
239 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
240
241 LogFlow(("pgmPoolMonitorChainChanging: %VGv phys=%VGp kind=%d\n", pvAddress, GCPhysFault, pPage->enmKind));
242
243 for (;;)
244 {
245 union
246 {
247 void *pv;
248 PX86PT pPT;
249 PX86PTPAE pPTPae;
250 PX86PD pPD;
251 PX86PDPAE pPDPae;
252 } uShw;
253 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
254
255 switch (pPage->enmKind)
256 {
257 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
258 {
259 const unsigned iShw = off / sizeof(X86PTE);
260 if (uShw.pPT->a[iShw].n.u1Present)
261 {
262# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
263 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
264 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
265 pgmPoolTracDerefGCPhysHint(pPool, pPage,
266 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
267 pGstPte->u & X86_PTE_PG_MASK);
268# endif
269 uShw.pPT->a[iShw].u = 0;
270 }
271 break;
272 }
273
274 /* page/2 sized */
275 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
276 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
277 {
278 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
279 if (uShw.pPTPae->a[iShw].n.u1Present)
280 {
281# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
282 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
283 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
284 pgmPoolTracDerefGCPhysHint(pPool, pPage,
285 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
286 pGstPte->u & X86_PTE_PG_MASK);
287# endif
288 uShw.pPTPae->a[iShw].u = 0;
289 }
290 }
291 break;
292
293 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
294 {
295 const unsigned iShw = off / sizeof(X86PTEPAE);
296 if (uShw.pPTPae->a[iShw].n.u1Present)
297 {
298# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
299 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
300 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
301 pgmPoolTracDerefGCPhysHint(pPool, pPage,
302 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
303 pGstPte->u & X86_PTE_PAE_PG_MASK);
304# endif
305 uShw.pPTPae->a[iShw].u = 0;
306 }
307 break;
308 }
309
310 case PGMPOOLKIND_ROOT_32BIT_PD:
311 {
312 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
313 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
314 {
315 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
316 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
317 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
318 }
319 /* paranoia / a bit assumptive. */
320 else if ( pCpu
321 && (off & 4)
322 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
323 {
324 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
325 if ( iShw2 != iShw
326 && iShw2 < ELEMENTS(uShw.pPD->a)
327 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
328 {
329 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
330 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
331 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
332 }
333 }
334#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
335 if ( uShw.pPD->a[iShw].n.u1Present
336 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
337 {
338 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
339# ifdef IN_GC /* TLB load - we're pushing things a bit... */
340 ASMProbeReadByte(pvAddress);
341# endif
342 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
343 uShw.pPD->a[iShw].u = 0;
344 }
345#endif
346 break;
347 }
348
349 case PGMPOOLKIND_ROOT_PAE_PD:
350 {
351 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
352 for (unsigned i = 0; i < 2; i++, iShw++)
353 {
354 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
355 {
356 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
357 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
358 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
359 }
360 /* paranoia / a bit assumptive. */
361 else if ( pCpu
362 && (off & 4)
363 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
364 {
365 const unsigned iShw2 = iShw + 2;
366 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
367 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
368 {
369 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
370 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
371 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
372 }
373 }
374#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
375 if ( uShw.pPDPae->a[iShw].n.u1Present
376 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
377 {
378 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
379# ifdef IN_GC /* TLB load - we're pushing things a bit... */
380 ASMProbeReadByte(pvAddress);
381# endif
382 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
383 uShw.pPDPae->a[iShw].u = 0;
384 }
385#endif
386 }
387 break;
388 }
389
390 default:
391 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
392 }
393
394 /* next */
395 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
396 return;
397 pPage = &pPool->aPages[pPage->iMonitoredNext];
398 }
399}
400
401
402# ifndef IN_RING3
403/**
404 * Checks if a access could be a fork operation in progress.
405 *
406 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
407 *
408 * @returns true if it's likly that we're forking, otherwise false.
409 * @param pPool The pool.
410 * @param pCpu The disassembled instruction.
411 * @param offFault The access offset.
412 */
413DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
414{
415 /*
416 * i386 linux is using btr to clear X86_PTE_RW.
417 * The functions involved are (2.6.16 source inspection):
418 * clear_bit
419 * ptep_set_wrprotect
420 * copy_one_pte
421 * copy_pte_range
422 * copy_pmd_range
423 * copy_pud_range
424 * copy_page_range
425 * dup_mmap
426 * dup_mm
427 * copy_mm
428 * copy_process
429 * do_fork
430 */
431 if ( pCpu->pCurInstr->opcode == OP_BTR
432 && !(offFault & 4)
433 /** @todo Validate that the bit index is X86_PTE_RW. */
434 )
435 {
436 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
437 return true;
438 }
439 return false;
440}
441
442
443/**
444 * Determin whether the page is likely to have been reused.
445 *
446 * @returns true if we consider the page as being reused for a different purpose.
447 * @returns false if we consider it to still be a paging page.
448 * @param pPage The page in question.
449 * @param pCpu The disassembly info for the faulting insturction.
450 * @param pvFault The fault address.
451 *
452 * @remark The REP prefix check is left to the caller because of STOSD/W.
453 */
454DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
455{
456 switch (pCpu->pCurInstr->opcode)
457 {
458 case OP_PUSH:
459 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
460 return true;
461 case OP_PUSHF:
462 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
463 return true;
464 case OP_PUSHA:
465 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
466 return true;
467 case OP_FXSAVE:
468 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
469 return true;
470 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
471 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
472 return true;
473 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
474 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
475 return true;
476 }
477 if ( (pCpu->param1.flags & USE_REG_GEN32)
478 && (pCpu->param1.base.reg_gen32 == USE_REG_ESP))
479 {
480 Log4(("pgmPoolMonitorIsReused: ESP\n"));
481 return true;
482 }
483
484 //if (pPage->fCR3Mix)
485 // return false;
486 return false;
487}
488
489
490/**
491 * Flushes the page being accessed.
492 *
493 * @returns VBox status code suitable for scheduling.
494 * @param pVM The VM handle.
495 * @param pPool The pool.
496 * @param pPage The pool page (head).
497 * @param pCpu The disassembly of the write instruction.
498 * @param pRegFrame The trap register frame.
499 * @param GCPhysFault The fault address as guest physical address.
500 * @param pvFault The fault address.
501 */
502static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
503 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
504{
505 /*
506 * First, do the flushing.
507 */
508 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
509
510 /*
511 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
512 */
513 uint32_t cbWritten;
514 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
515 if (VBOX_SUCCESS(rc2))
516 pRegFrame->eip += pCpu->opsize;
517 else if (rc2 == VERR_EM_INTERPRETER)
518 {
519#ifdef IN_GC
520 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip))
521 {
522 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
523 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
524 rc = VINF_SUCCESS;
525 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
526 }
527 else
528#endif
529 {
530 rc = VINF_EM_RAW_EMULATE_INSTR;
531 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
532 }
533 }
534 else
535 rc = rc2;
536
537 /* See use in pgmPoolAccessHandlerSimple(). */
538 PGM_INVL_GUEST_TLBS();
539
540 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
541 return rc;
542
543}
544
545
546/**
547 * Handles the STOSD write accesses.
548 *
549 * @returns VBox status code suitable for scheduling.
550 * @param pVM The VM handle.
551 * @param pPool The pool.
552 * @param pPage The pool page (head).
553 * @param pCpu The disassembly of the write instruction.
554 * @param pRegFrame The trap register frame.
555 * @param GCPhysFault The fault address as guest physical address.
556 * @param pvFault The fault address.
557 */
558DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
559 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
560{
561 /*
562 * Increment the modification counter and insert it into the list
563 * of modified pages the first time.
564 */
565 if (!pPage->cModifications++)
566 pgmPoolMonitorModifiedInsert(pPool, pPage);
567
568 /*
569 * Execute REP STOSD.
570 *
571 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
572 * write situation, meaning that it's safe to write here.
573 */
574#ifdef IN_GC
575 uint32_t *pu32 = (uint32_t *)pvFault;
576#else
577 RTGCPTR pu32 = pvFault;
578#endif
579 while (pRegFrame->ecx)
580 {
581 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pu32, NULL);
582#ifdef IN_GC
583 *pu32++ = pRegFrame->eax;
584#else
585 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
586 pu32 += 4;
587#endif
588 GCPhysFault += 4;
589 pRegFrame->edi += 4;
590 pRegFrame->ecx--;
591 }
592 pRegFrame->eip += pCpu->opsize;
593
594 /* See use in pgmPoolAccessHandlerSimple(). */
595 PGM_INVL_GUEST_TLBS();
596
597 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
598 return VINF_SUCCESS;
599}
600
601
602/**
603 * Handles the simple write accesses.
604 *
605 * @returns VBox status code suitable for scheduling.
606 * @param pVM The VM handle.
607 * @param pPool The pool.
608 * @param pPage The pool page (head).
609 * @param pCpu The disassembly of the write instruction.
610 * @param pRegFrame The trap register frame.
611 * @param GCPhysFault The fault address as guest physical address.
612 * @param pvFault The fault address.
613 */
614DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
615 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
616{
617 /*
618 * Increment the modification counter and insert it into the list
619 * of modified pages the first time.
620 */
621 if (!pPage->cModifications++)
622 pgmPoolMonitorModifiedInsert(pPool, pPage);
623
624 /*
625 * Clear all the pages. ASSUMES that pvFault is readable.
626 */
627 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
628
629 /*
630 * Interpret the instruction.
631 */
632 uint32_t cb;
633 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
634 if (VBOX_SUCCESS(rc))
635 pRegFrame->eip += pCpu->opsize;
636 else if (rc == VERR_EM_INTERPRETER)
637 {
638# ifdef IN_GC
639 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)(RTGCUINTPTR)pCpu->opaddr))
640 {
641 /* We're not able to handle this in ring-3, so fix the interpreter! */
642 /** @note Should be fine. There's no need to flush the whole thing. */
643#ifndef DEBUG_sandervl
644 AssertMsgFailed(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
645 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
646#endif
647 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch1);
648 rc = pgmPoolMonitorChainFlush(pPool, pPage);
649 }
650 else
651# endif
652 {
653 rc = VINF_EM_RAW_EMULATE_INSTR;
654 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
655 }
656 }
657
658 /*
659 * Quick hack, with logging enabled we're getting stale
660 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
661 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
662 * have to be fixed to support this. But that'll have to wait till next week.
663 *
664 * An alternative is to keep track of the changed PTEs together with the
665 * GCPhys from the guest PT. This may proove expensive though.
666 *
667 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
668 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
669 */
670 PGM_INVL_GUEST_TLBS();
671
672 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
673 return rc;
674}
675
676
677/**
678 * \#PF Handler callback for PT write accesses.
679 *
680 * @returns VBox status code (appropriate for GC return).
681 * @param pVM VM Handle.
682 * @param uErrorCode CPU Error code.
683 * @param pRegFrame Trap register frame.
684 * NULL on DMA and other non CPU access.
685 * @param pvFault The fault address (cr2).
686 * @param GCPhysFault The GC physical address corresponding to pvFault.
687 * @param pvUser User argument.
688 */
689DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
690{
691 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
692 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
693 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
694 LogFlow(("pgmPoolAccessHandler: pvFault=%p pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
695
696 /*
697 * We should ALWAYS have the list head as user parameter. This
698 * is because we use that page to record the changes.
699 */
700 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
701
702 /*
703 * Disassemble the faulting instruction.
704 */
705 DISCPUSTATE Cpu;
706 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
707 AssertRCReturn(rc, rc);
708
709 /*
710 * Check if it's worth dealing with.
711 */
712 bool fReused = false;
713 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
714 || pPage->fCR3Mix)
715 && !(fReused = pgmPoolMonitorIsReused(pPage, &Cpu, pvFault))
716 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
717 {
718 /*
719 * Simple instructions, no REP prefix.
720 */
721 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
722 {
723 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
724 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
725 return rc;
726 }
727
728 /*
729 * Windows is frequently doing small memset() operations (netio test 4k+).
730 * We have to deal with these or we'll kill the cache and performance.
731 */
732 if ( Cpu.pCurInstr->opcode == OP_STOSWD
733 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
734 && pRegFrame->ecx <= 0x20
735 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
736 && !((uintptr_t)pvFault & 3)
737 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
738 && Cpu.mode == CPUMODE_32BIT
739 && Cpu.opmode == CPUMODE_32BIT
740 && Cpu.addrmode == CPUMODE_32BIT
741 && Cpu.prefix == PREFIX_REP
742 && !pRegFrame->eflags.Bits.u1DF
743 )
744 {
745 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
746 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
747 return rc;
748 }
749
750 /* REP prefix, don't bother. */
751 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
752 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
753 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
754 }
755
756 /*
757 * Not worth it, so flush it.
758 *
759 * If we considered it to be reused, don't to back to ring-3
760 * to emulate failed instructions since we usually cannot
761 * interpret then. This may be a bit risky, in which case
762 * the reuse detection must be fixed.
763 */
764 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
765 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
766 rc = VINF_SUCCESS;
767 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
768 return rc;
769}
770
771# endif /* !IN_RING3 */
772#endif /* PGMPOOL_WITH_MONITORING */
773
774
775
776#ifdef PGMPOOL_WITH_CACHE
777/**
778 * Inserts a page into the GCPhys hash table.
779 *
780 * @param pPool The pool.
781 * @param pPage The page.
782 */
783DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
784{
785 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
786 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
787 pPage->iNext = pPool->aiHash[iHash];
788 pPool->aiHash[iHash] = pPage->idx;
789}
790
791
792/**
793 * Removes a page from the GCPhys hash table.
794 *
795 * @param pPool The pool.
796 * @param pPage The page.
797 */
798DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
799{
800 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
801 if (pPool->aiHash[iHash] == pPage->idx)
802 pPool->aiHash[iHash] = pPage->iNext;
803 else
804 {
805 uint16_t iPrev = pPool->aiHash[iHash];
806 for (;;)
807 {
808 const int16_t i = pPool->aPages[iPrev].iNext;
809 if (i == pPage->idx)
810 {
811 pPool->aPages[iPrev].iNext = pPage->iNext;
812 break;
813 }
814 if (i == NIL_PGMPOOL_IDX)
815 {
816 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
817 break;
818 }
819 iPrev = i;
820 }
821 }
822 pPage->iNext = NIL_PGMPOOL_IDX;
823}
824
825
826/**
827 * Frees up one cache page.
828 *
829 * @returns VBox status code.
830 * @retval VINF_SUCCESS on success.
831 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
832 * @param pPool The pool.
833 * @param iUser The user index.
834 */
835static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
836{
837 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
838 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
839
840 /*
841 * Select one page from the tail of the age list.
842 */
843 uint16_t iToFree = pPool->iAgeTail;
844 if (iToFree == iUser)
845 iToFree = pPool->aPages[iToFree].iAgePrev;
846/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
847 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
848 {
849 uint16_t i = pPool->aPages[iToFree].iAgePrev;
850 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
851 {
852 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
853 continue;
854 iToFree = i;
855 break;
856 }
857 }
858*/
859 Assert(iToFree != iUser);
860 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
861
862 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
863 if (rc == VINF_SUCCESS)
864 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
865 return rc;
866}
867
868
869/**
870 * Checks if a kind mismatch is really a page being reused
871 * or if it's just normal remappings.
872 *
873 * @returns true if reused and the cached page (enmKind1) should be flushed
874 * @returns false if not reused.
875 * @param enmKind1 The kind of the cached page.
876 * @param enmKind2 The kind of the requested page.
877 */
878static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
879{
880 switch (enmKind1)
881 {
882 /*
883 * Never reuse them. There is no remapping in non-paging mode.
884 */
885 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
886 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
887 return true;
888
889 /*
890 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
891 */
892 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
893 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
894 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
895 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
896 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
897 switch (enmKind2)
898 {
899 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
900 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
901 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
902 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
903 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
904 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
905 return true;
906 default:
907 return false;
908 }
909
910 /*
911 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
912 */
913 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
914 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
915 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
916 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
917 switch (enmKind2)
918 {
919 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
920 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
921 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
922 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
923 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
924 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
925 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
926 return true;
927 default:
928 return false;
929 }
930
931 /*
932 * These cannot be flushed, and it's common to reuse the PDs as PTs.
933 */
934 case PGMPOOLKIND_ROOT_32BIT_PD:
935 case PGMPOOLKIND_ROOT_PAE_PD:
936 case PGMPOOLKIND_ROOT_PDPT:
937 case PGMPOOLKIND_ROOT_PML4:
938 return false;
939
940 default:
941 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
942 }
943}
944
945
946/**
947 * Attempts to satisfy a pgmPoolAlloc request from the cache.
948 *
949 * @returns VBox status code.
950 * @retval VINF_PGM_CACHED_PAGE on success.
951 * @retval VERR_FILE_NOT_FOUND if not found.
952 * @param pPool The pool.
953 * @param GCPhys The GC physical address of the page we're gonna shadow.
954 * @param enmKind The kind of mapping.
955 * @param iUser The shadow page pool index of the user table.
956 * @param iUserTable The index into the user table (shadowed).
957 * @param ppPage Where to store the pointer to the page.
958 */
959static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
960{
961 /*
962 * Look up the GCPhys in the hash.
963 */
964 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
965 if (i != NIL_PGMPOOL_IDX)
966 {
967 do
968 {
969 PPGMPOOLPAGE pPage = &pPool->aPages[i];
970 if (pPage->GCPhys == GCPhys)
971 {
972 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
973 {
974 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
975 if (VBOX_SUCCESS(rc))
976 {
977 *ppPage = pPage;
978 STAM_COUNTER_INC(&pPool->StatCacheHits);
979 return VINF_PGM_CACHED_PAGE;
980 }
981 return rc;
982 }
983
984 /*
985 * The kind is different. In some cases we should now flush the page
986 * as it has been reused, but in most cases this is normal remapping
987 * of PDs as PT or big pages using the GCPhys field in a slightly
988 * different way than the other kinds.
989 */
990 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
991 {
992 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
993 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
994 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
995 break;
996 }
997 }
998
999 /* next */
1000 i = pPage->iNext;
1001 } while (i != NIL_PGMPOOL_IDX);
1002 }
1003
1004 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1005 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1006 return VERR_FILE_NOT_FOUND;
1007}
1008
1009
1010/**
1011 * Inserts a page into the cache.
1012 *
1013 * @param pPool The pool.
1014 * @param pPage The cached page.
1015 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1016 */
1017static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1018{
1019 /*
1020 * Insert into the GCPhys hash if the page is fit for that.
1021 */
1022 Assert(!pPage->fCached);
1023 if (fCanBeCached)
1024 {
1025 pPage->fCached = true;
1026 pgmPoolHashInsert(pPool, pPage);
1027 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1028 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1029 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1030 }
1031 else
1032 {
1033 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1034 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1035 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1036 }
1037
1038 /*
1039 * Insert at the head of the age list.
1040 */
1041 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1042 pPage->iAgeNext = pPool->iAgeHead;
1043 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1044 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1045 else
1046 pPool->iAgeTail = pPage->idx;
1047 pPool->iAgeHead = pPage->idx;
1048}
1049
1050
1051/**
1052 * Flushes a cached page.
1053 *
1054 * @param pPool The pool.
1055 * @param pPage The cached page.
1056 */
1057static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1058{
1059 /*
1060 * Remove the page from the hash.
1061 */
1062 if (pPage->fCached)
1063 {
1064 pPage->fCached = false;
1065 pgmPoolHashRemove(pPool, pPage);
1066 }
1067 else
1068 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1069
1070 /*
1071 * Remove it from the age list.
1072 */
1073 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1074 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1075 else
1076 pPool->iAgeTail = pPage->iAgePrev;
1077 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1078 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1079 else
1080 pPool->iAgeHead = pPage->iAgeNext;
1081 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1082 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1083}
1084#endif /* PGMPOOL_WITH_CACHE */
1085
1086
1087#ifdef PGMPOOL_WITH_MONITORING
1088/**
1089 * Looks for pages sharing the monitor.
1090 *
1091 * @returns Pointer to the head page.
1092 * @returns NULL if not found.
1093 * @param pPool The Pool
1094 * @param pNewPage The page which is going to be monitored.
1095 */
1096static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1097{
1098#ifdef PGMPOOL_WITH_CACHE
1099 /*
1100 * Look up the GCPhys in the hash.
1101 */
1102 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1103 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1104 if (i == NIL_PGMPOOL_IDX)
1105 return NULL;
1106 do
1107 {
1108 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1109 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1110 && pPage != pNewPage)
1111 {
1112 switch (pPage->enmKind)
1113 {
1114 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1115 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1116 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1117 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1118 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1119 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1120 case PGMPOOLKIND_ROOT_32BIT_PD:
1121 case PGMPOOLKIND_ROOT_PAE_PD:
1122 case PGMPOOLKIND_ROOT_PDPT:
1123 case PGMPOOLKIND_ROOT_PML4:
1124 {
1125 /* find the head */
1126 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1127 {
1128 Assert(pPage->iMonitoredPrev != pPage->idx);
1129 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1130 }
1131 return pPage;
1132 }
1133
1134 /* ignore, no monitoring. */
1135 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1136 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1137 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1138 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1139 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1140 break;
1141 default:
1142 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1143 }
1144 }
1145
1146 /* next */
1147 i = pPage->iNext;
1148 } while (i != NIL_PGMPOOL_IDX);
1149#endif
1150 return NULL;
1151}
1152
1153/**
1154 * Enabled write monitoring of a guest page.
1155 *
1156 * @returns VBox status code.
1157 * @retval VINF_SUCCESS on success.
1158 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1159 * @param pPool The pool.
1160 * @param pPage The cached page.
1161 */
1162static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1163{
1164 /*
1165 * Filter out the relevant kinds.
1166 */
1167 switch (pPage->enmKind)
1168 {
1169 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1170 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1171 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1172 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1173 break;
1174
1175 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1176 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1177 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1178 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1179 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1180 /* Nothing to monitor here. */
1181 return VINF_SUCCESS;
1182
1183 case PGMPOOLKIND_ROOT_32BIT_PD:
1184 case PGMPOOLKIND_ROOT_PAE_PD:
1185#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1186 break;
1187#endif
1188 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1189 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1190 case PGMPOOLKIND_ROOT_PDPT:
1191 case PGMPOOLKIND_ROOT_PML4:
1192 default:
1193 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1194 }
1195
1196 /*
1197 * Install handler.
1198 */
1199 int rc;
1200 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1201 if (pPageHead)
1202 {
1203 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1204 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1205 pPage->iMonitoredPrev = pPageHead->idx;
1206 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1207 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1208 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1209 pPageHead->iMonitoredNext = pPage->idx;
1210 rc = VINF_SUCCESS;
1211 }
1212 else
1213 {
1214 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1215 PVM pVM = pPool->CTXSUFF(pVM);
1216 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1217 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1218 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1219 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1220 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1221 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
1222 pPool->pszAccessHandler);
1223 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1224 * the heap size should suffice. */
1225 AssertFatalRC(rc);
1226 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1227 rc = VERR_PGM_POOL_CLEARED;
1228 }
1229 pPage->fMonitored = true;
1230 return rc;
1231}
1232
1233
1234/**
1235 * Disables write monitoring of a guest page.
1236 *
1237 * @returns VBox status code.
1238 * @retval VINF_SUCCESS on success.
1239 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1240 * @param pPool The pool.
1241 * @param pPage The cached page.
1242 */
1243static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1244{
1245 /*
1246 * Filter out the relevant kinds.
1247 */
1248 switch (pPage->enmKind)
1249 {
1250 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1251 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1252 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1253 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1254 break;
1255
1256 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1257 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1258 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1259 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1260 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1261 /* Nothing to monitor here. */
1262 return VINF_SUCCESS;
1263
1264 case PGMPOOLKIND_ROOT_32BIT_PD:
1265 case PGMPOOLKIND_ROOT_PAE_PD:
1266#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1267 break;
1268#endif
1269 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1270 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1271 case PGMPOOLKIND_ROOT_PDPT:
1272 case PGMPOOLKIND_ROOT_PML4:
1273 default:
1274 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1275 }
1276
1277 /*
1278 * Remove the page from the monitored list or uninstall it if last.
1279 */
1280 const PVM pVM = pPool->CTXSUFF(pVM);
1281 int rc;
1282 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1283 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1284 {
1285 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1286 {
1287 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1288 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1289 pNewHead->fCR3Mix = pPage->fCR3Mix;
1290 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1291 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1292 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1293 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pNewHead),
1294 pPool->pszAccessHandler);
1295 AssertFatalRCSuccess(rc);
1296 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1297 }
1298 else
1299 {
1300 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1301 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1302 {
1303 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1304 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1305 }
1306 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1307 rc = VINF_SUCCESS;
1308 }
1309 }
1310 else
1311 {
1312 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1313 AssertFatalRC(rc);
1314 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1315 rc = VERR_PGM_POOL_CLEARED;
1316 }
1317 pPage->fMonitored = false;
1318
1319 /*
1320 * Remove it from the list of modified pages (if in it).
1321 */
1322 pgmPoolMonitorModifiedRemove(pPool, pPage);
1323
1324 return rc;
1325}
1326
1327
1328#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1329/**
1330 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1331 *
1332 * @param pPool The Pool.
1333 * @param pPage A page in the chain.
1334 * @param fCR3Mix The new fCR3Mix value.
1335 */
1336static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1337{
1338 /* current */
1339 pPage->fCR3Mix = fCR3Mix;
1340
1341 /* before */
1342 int16_t idx = pPage->iMonitoredPrev;
1343 while (idx != NIL_PGMPOOL_IDX)
1344 {
1345 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1346 idx = pPool->aPages[idx].iMonitoredPrev;
1347 }
1348
1349 /* after */
1350 idx = pPage->iMonitoredNext;
1351 while (idx != NIL_PGMPOOL_IDX)
1352 {
1353 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1354 idx = pPool->aPages[idx].iMonitoredNext;
1355 }
1356}
1357
1358
1359/**
1360 * Installs or modifies monitoring of a CR3 page (special).
1361 *
1362 * We're pretending the CR3 page is shadowed by the pool so we can use the
1363 * generic mechanisms in detecting chained monitoring. (This also gives us a
1364 * tast of what code changes are required to really pool CR3 shadow pages.)
1365 *
1366 * @returns VBox status code.
1367 * @param pPool The pool.
1368 * @param idxRoot The CR3 (root) page index.
1369 * @param GCPhysCR3 The (new) CR3 value.
1370 */
1371int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1372{
1373 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1374 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1375 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1376 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1377
1378 /*
1379 * The unlikely case where it already matches.
1380 */
1381 if (pPage->GCPhys == GCPhysCR3)
1382 {
1383 Assert(pPage->fMonitored);
1384 return VINF_SUCCESS;
1385 }
1386
1387 /*
1388 * Flush the current monitoring and remove it from the hash.
1389 */
1390 int rc = VINF_SUCCESS;
1391 if (pPage->fMonitored)
1392 {
1393 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1394 rc = pgmPoolMonitorFlush(pPool, pPage);
1395 if (rc == VERR_PGM_POOL_CLEARED)
1396 rc = VINF_SUCCESS;
1397 else
1398 AssertFatalRC(rc);
1399 pgmPoolHashRemove(pPool, pPage);
1400 }
1401
1402 /*
1403 * Monitor the page at the new location and insert it into the hash.
1404 */
1405 pPage->GCPhys = GCPhysCR3;
1406 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1407 if (rc2 != VERR_PGM_POOL_CLEARED)
1408 {
1409 AssertFatalRC(rc2);
1410 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1411 rc = rc2;
1412 }
1413 pgmPoolHashInsert(pPool, pPage);
1414 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1415 return rc;
1416}
1417
1418
1419/**
1420 * Removes the monitoring of a CR3 page (special).
1421 *
1422 * @returns VBox status code.
1423 * @param pPool The pool.
1424 * @param idxRoot The CR3 (root) page index.
1425 */
1426int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1427{
1428 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1429 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1430 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1431 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1432
1433 if (!pPage->fMonitored)
1434 return VINF_SUCCESS;
1435
1436 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1437 int rc = pgmPoolMonitorFlush(pPool, pPage);
1438 if (rc != VERR_PGM_POOL_CLEARED)
1439 AssertFatalRC(rc);
1440 else
1441 rc = VINF_SUCCESS;
1442 pgmPoolHashRemove(pPool, pPage);
1443 Assert(!pPage->fMonitored);
1444 pPage->GCPhys = NIL_RTGCPHYS;
1445 return rc;
1446}
1447#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1448
1449
1450/**
1451 * Inserts the page into the list of modified pages.
1452 *
1453 * @param pPool The pool.
1454 * @param pPage The page.
1455 */
1456void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1457{
1458 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1459 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1460 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1461 && pPool->iModifiedHead != pPage->idx,
1462 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1463 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1464 pPool->iModifiedHead, pPool->cModifiedPages));
1465
1466 pPage->iModifiedNext = pPool->iModifiedHead;
1467 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1468 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1469 pPool->iModifiedHead = pPage->idx;
1470 pPool->cModifiedPages++;
1471#ifdef VBOX_WITH_STATISTICS
1472 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1473 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1474#endif
1475}
1476
1477
1478/**
1479 * Removes the page from the list of modified pages and resets the
1480 * moficiation counter.
1481 *
1482 * @param pPool The pool.
1483 * @param pPage The page which is believed to be in the list of modified pages.
1484 */
1485static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1486{
1487 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1488 if (pPool->iModifiedHead == pPage->idx)
1489 {
1490 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1491 pPool->iModifiedHead = pPage->iModifiedNext;
1492 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1493 {
1494 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1495 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1496 }
1497 pPool->cModifiedPages--;
1498 }
1499 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1500 {
1501 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1502 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1503 {
1504 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1505 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1506 }
1507 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1508 pPool->cModifiedPages--;
1509 }
1510 else
1511 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1512 pPage->cModifications = 0;
1513}
1514
1515
1516/**
1517 * Zaps the list of modified pages, resetting their modification counters in the process.
1518 *
1519 * @param pVM The VM handle.
1520 */
1521void pgmPoolMonitorModifiedClearAll(PVM pVM)
1522{
1523 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1524 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1525
1526 unsigned cPages = 0; NOREF(cPages);
1527 uint16_t idx = pPool->iModifiedHead;
1528 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1529 while (idx != NIL_PGMPOOL_IDX)
1530 {
1531 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1532 idx = pPage->iModifiedNext;
1533 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1534 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1535 pPage->cModifications = 0;
1536 Assert(++cPages);
1537 }
1538 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1539 pPool->cModifiedPages = 0;
1540}
1541
1542
1543/**
1544 * Clear all shadow pages and clear all modification counters.
1545 *
1546 * @param pVM The VM handle.
1547 * @remark Should only be used when monitoring is available, thus placed in
1548 * the PGMPOOL_WITH_MONITORING #ifdef.
1549 */
1550void pgmPoolClearAll(PVM pVM)
1551{
1552 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1553 STAM_PROFILE_START(&pPool->StatClearAll, c);
1554 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1555
1556 /*
1557 * Iterate all the pages until we've encountered all that in use.
1558 * This is simple but not quite optimal solution.
1559 */
1560 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1561 unsigned cLeft = pPool->cUsedPages;
1562 unsigned iPage = pPool->cCurPages;
1563 while (--iPage >= PGMPOOL_IDX_FIRST)
1564 {
1565 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1566 if (pPage->GCPhys != NIL_RTGCPHYS)
1567 {
1568 switch (pPage->enmKind)
1569 {
1570 /*
1571 * We only care about shadow page tables.
1572 */
1573 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1574 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1575 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1576 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1577 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1578 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1579 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1580 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1581 {
1582#ifdef PGMPOOL_WITH_USER_TRACKING
1583 if (pPage->cPresent)
1584#endif
1585 {
1586 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1587 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1588 ASMMemZeroPage(pvShw);
1589 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1590#ifdef PGMPOOL_WITH_USER_TRACKING
1591 pPage->cPresent = 0;
1592 pPage->iFirstPresent = ~0;
1593#endif
1594 }
1595 }
1596 /* fall thru */
1597
1598 default:
1599 Assert(!pPage->cModifications || ++cModifiedPages);
1600 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1601 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1602 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1603 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1604 pPage->cModifications = 0;
1605 break;
1606
1607 }
1608 if (!--cLeft)
1609 break;
1610 }
1611 }
1612
1613 /* swipe the special pages too. */
1614 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1615 {
1616 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1617 if (pPage->GCPhys != NIL_RTGCPHYS)
1618 {
1619 Assert(!pPage->cModifications || ++cModifiedPages);
1620 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1621 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1622 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1623 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1624 pPage->cModifications = 0;
1625 }
1626 }
1627
1628 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1629 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1630 pPool->cModifiedPages = 0;
1631
1632#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1633 /*
1634 * Clear all the GCPhys links and rebuild the phys ext free list.
1635 */
1636 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
1637 pRam;
1638 pRam = CTXALLSUFF(pRam->pNext))
1639 {
1640 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1641 while (iPage-- > 0)
1642 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1643 }
1644
1645 pPool->iPhysExtFreeHead = 0;
1646 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1647 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1648 for (unsigned i = 0; i < cMaxPhysExts; i++)
1649 {
1650 paPhysExts[i].iNext = i + 1;
1651 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1652 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1653 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1654 }
1655 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1656#endif
1657
1658
1659 pPool->cPresent = 0;
1660 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1661}
1662#endif /* PGMPOOL_WITH_MONITORING */
1663
1664
1665#ifdef PGMPOOL_WITH_USER_TRACKING
1666/**
1667 * Frees up at least one user entry.
1668 *
1669 * @returns VBox status code.
1670 * @retval VINF_SUCCESS if successfully added.
1671 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1672 * @param pPool The pool.
1673 * @param iUser The user index.
1674 */
1675static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1676{
1677 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1678#ifdef PGMPOOL_WITH_CACHE
1679 /*
1680 * Just free cached pages in a braindead fashion.
1681 */
1682 /** @todo walk the age list backwards and free the first with usage. */
1683 int rc = VINF_SUCCESS;
1684 do
1685 {
1686 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1687 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1688 rc = rc2;
1689 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1690 return rc;
1691#else
1692 /*
1693 * Lazy approach.
1694 */
1695 pgmPoolFlushAllInt(pPool);
1696 return VERR_PGM_POOL_FLUSHED;
1697#endif
1698}
1699
1700
1701/**
1702 * Inserts a page into the cache.
1703 *
1704 * This will create user node for the page, insert it into the GCPhys
1705 * hash, and insert it into the age list.
1706 *
1707 * @returns VBox status code.
1708 * @retval VINF_SUCCESS if successfully added.
1709 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1710 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1711 * @param pPool The pool.
1712 * @param pPage The cached page.
1713 * @param GCPhys The GC physical address of the page we're gonna shadow.
1714 * @param iUser The user index.
1715 * @param iUserTable The user table index.
1716 */
1717DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1718{
1719 int rc = VINF_SUCCESS;
1720 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1721
1722 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
1723
1724 /*
1725 * Find free a user node.
1726 */
1727 uint16_t i = pPool->iUserFreeHead;
1728 if (i == NIL_PGMPOOL_USER_INDEX)
1729 {
1730 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1731 if (VBOX_FAILURE(rc))
1732 return rc;
1733 i = pPool->iUserFreeHead;
1734 }
1735
1736 /*
1737 * Unlink the user node from the free list,
1738 * initialize and insert it into the user list.
1739 */
1740 pPool->iUserFreeHead = pUser[i].iNext;
1741 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1742 pUser[i].iUser = iUser;
1743 pUser[i].iUserTable = iUserTable;
1744 pPage->iUserHead = i;
1745
1746 /*
1747 * Insert into cache and enable monitoring of the guest page if enabled.
1748 *
1749 * Until we implement caching of all levels, including the CR3 one, we'll
1750 * have to make sure we don't try monitor & cache any recursive reuse of
1751 * a monitored CR3 page. Because all windows versions are doing this we'll
1752 * have to be able to do combined access monitoring, CR3 + PT and
1753 * PD + PT (guest PAE).
1754 *
1755 * Update:
1756 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1757 */
1758#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1759# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1760 const bool fCanBeMonitored = true;
1761# else
1762 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1763 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1764 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1765# endif
1766# ifdef PGMPOOL_WITH_CACHE
1767 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1768# endif
1769 if (fCanBeMonitored)
1770 {
1771# ifdef PGMPOOL_WITH_MONITORING
1772 rc = pgmPoolMonitorInsert(pPool, pPage);
1773 if (rc == VERR_PGM_POOL_CLEARED)
1774 {
1775 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1776# ifndef PGMPOOL_WITH_CACHE
1777 pgmPoolMonitorFlush(pPool, pPage);
1778 rc = VERR_PGM_POOL_FLUSHED;
1779# endif
1780 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1781 pUser[i].iNext = pPool->iUserFreeHead;
1782 pUser[i].iUser = NIL_PGMPOOL_IDX;
1783 pPool->iUserFreeHead = i;
1784 }
1785 }
1786# endif
1787#endif /* PGMPOOL_WITH_MONITORING */
1788 return rc;
1789}
1790
1791
1792# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1793/**
1794 * Adds a user reference to a page.
1795 *
1796 * This will
1797 * This will move the page to the head of the
1798 *
1799 * @returns VBox status code.
1800 * @retval VINF_SUCCESS if successfully added.
1801 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1802 * @param pPool The pool.
1803 * @param pPage The cached page.
1804 * @param iUser The user index.
1805 * @param iUserTable The user table.
1806 */
1807static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1808{
1809 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1810
1811 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
1812# ifdef VBOX_STRICT
1813 /*
1814 * Check that the entry doesn't already exists.
1815 */
1816 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1817 {
1818 uint16_t i = pPage->iUserHead;
1819 do
1820 {
1821 Assert(i < pPool->cMaxUsers);
1822 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
1823 i = paUsers[i].iNext;
1824 } while (i != NIL_PGMPOOL_USER_INDEX);
1825 }
1826# endif
1827
1828 /*
1829 * Allocate a user node.
1830 */
1831 uint16_t i = pPool->iUserFreeHead;
1832 if (i == NIL_PGMPOOL_USER_INDEX)
1833 {
1834 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1835 if (VBOX_FAILURE(rc))
1836 return rc;
1837 i = pPool->iUserFreeHead;
1838 }
1839 pPool->iUserFreeHead = paUsers[i].iNext;
1840
1841 /*
1842 * Initialize the user node and insert it.
1843 */
1844 paUsers[i].iNext = pPage->iUserHead;
1845 paUsers[i].iUser = iUser;
1846 paUsers[i].iUserTable = iUserTable;
1847 pPage->iUserHead = i;
1848
1849# ifdef PGMPOOL_WITH_CACHE
1850 /*
1851 * Tell the cache to update its replacement stats for this page.
1852 */
1853 pgmPoolCacheUsed(pPool, pPage);
1854# endif
1855 return VINF_SUCCESS;
1856}
1857# endif /* PGMPOOL_WITH_CACHE */
1858
1859
1860/**
1861 * Frees a user record associated with a page.
1862 *
1863 * This does not clear the entry in the user table, it simply replaces the
1864 * user record to the chain of free records.
1865 *
1866 * @param pPool The pool.
1867 * @param HCPhys The HC physical address of the shadow page.
1868 * @param iUser The shadow page pool index of the user table.
1869 * @param iUserTable The index into the user table (shadowed).
1870 */
1871static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1872{
1873 /*
1874 * Unlink and free the specified user entry.
1875 */
1876 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1877
1878 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1879 uint16_t i = pPage->iUserHead;
1880 if ( i != NIL_PGMPOOL_USER_INDEX
1881 && paUsers[i].iUser == iUser
1882 && paUsers[i].iUserTable == iUserTable)
1883 {
1884 pPage->iUserHead = paUsers[i].iNext;
1885
1886 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1887 paUsers[i].iNext = pPool->iUserFreeHead;
1888 pPool->iUserFreeHead = i;
1889 return;
1890 }
1891
1892 /* General: Linear search. */
1893 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1894 while (i != NIL_PGMPOOL_USER_INDEX)
1895 {
1896 if ( paUsers[i].iUser == iUser
1897 && paUsers[i].iUserTable == iUserTable)
1898 {
1899 if (iPrev != NIL_PGMPOOL_USER_INDEX)
1900 paUsers[iPrev].iNext = paUsers[i].iNext;
1901 else
1902 pPage->iUserHead = paUsers[i].iNext;
1903
1904 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1905 paUsers[i].iNext = pPool->iUserFreeHead;
1906 pPool->iUserFreeHead = i;
1907 return;
1908 }
1909 iPrev = i;
1910 i = paUsers[i].iNext;
1911 }
1912
1913 /* Fatal: didn't find it */
1914 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
1915 iUser, iUserTable, pPage->GCPhys));
1916}
1917
1918
1919/**
1920 * Gets the entry size of a shadow table.
1921 *
1922 * @param enmKind The kind of page.
1923 *
1924 * @returns The size of the entry in bytes. That is, 4 or 8.
1925 * @returns If the kind is not for a table, an assertion is raised and 0 is
1926 * returned.
1927 */
1928DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
1929{
1930 switch (enmKind)
1931 {
1932 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1933 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1934 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1935 case PGMPOOLKIND_ROOT_32BIT_PD:
1936 return 4;
1937
1938 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1939 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1940 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1941 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1942 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1943 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1944 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1945 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1946 case PGMPOOLKIND_ROOT_PAE_PD:
1947 case PGMPOOLKIND_ROOT_PDPT:
1948 case PGMPOOLKIND_ROOT_PML4:
1949 return 8;
1950
1951 default:
1952 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1953 }
1954}
1955
1956
1957/**
1958 * Gets the entry size of a guest table.
1959 *
1960 * @param enmKind The kind of page.
1961 *
1962 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
1963 * @returns If the kind is not for a table, an assertion is raised and 0 is
1964 * returned.
1965 */
1966DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
1967{
1968 switch (enmKind)
1969 {
1970 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1971 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1972 case PGMPOOLKIND_ROOT_32BIT_PD:
1973 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1974 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1975 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1976 return 4;
1977
1978 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1979 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1980 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1981 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1982 case PGMPOOLKIND_ROOT_PAE_PD:
1983 case PGMPOOLKIND_ROOT_PDPT:
1984 case PGMPOOLKIND_ROOT_PML4:
1985 return 8;
1986
1987 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1988 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1989 /** @todo can we return 0? (nobody is calling this...) */
1990 return 0;
1991
1992 default:
1993 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1994 }
1995}
1996
1997
1998#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1999/**
2000 * Scans one shadow page table for mappings of a physical page.
2001 *
2002 * @param pVM The VM handle.
2003 * @param pPhysPage The guest page in question.
2004 * @param iShw The shadow page table.
2005 * @param cRefs The number of references made in that PT.
2006 */
2007static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2008{
2009 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2010 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2011
2012 /*
2013 * Assert sanity.
2014 */
2015 Assert(cRefs == 1);
2016 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2017 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2018
2019 /*
2020 * Then, clear the actual mappings to the page in the shadow PT.
2021 */
2022 switch (pPage->enmKind)
2023 {
2024 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2025 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2026 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2027 {
2028 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2029 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2030 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2031 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2032 {
2033 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2034 pPT->a[i].u = 0;
2035 cRefs--;
2036 if (!cRefs)
2037 return;
2038 }
2039#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2040 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2041 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2042 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2043 {
2044 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2045 pPT->a[i].u = 0;
2046 }
2047#endif
2048 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2049 break;
2050 }
2051
2052 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2053 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2054 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2055 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2056 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2057 {
2058 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2059 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2060 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2061 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2062 {
2063 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2064 pPT->a[i].u = 0;
2065 cRefs--;
2066 if (!cRefs)
2067 return;
2068 }
2069#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2070 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2071 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2072 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2073 {
2074 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2075 pPT->a[i].u = 0;
2076 }
2077#endif
2078 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2079 break;
2080 }
2081
2082 default:
2083 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2084 }
2085}
2086
2087
2088/**
2089 * Scans one shadow page table for mappings of a physical page.
2090 *
2091 * @param pVM The VM handle.
2092 * @param pPhysPage The guest page in question.
2093 * @param iShw The shadow page table.
2094 * @param cRefs The number of references made in that PT.
2095 */
2096void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2097{
2098 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2099 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2100 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2101 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2102 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2103 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2104}
2105
2106
2107/**
2108 * Flushes a list of shadow page tables mapping the same physical page.
2109 *
2110 * @param pVM The VM handle.
2111 * @param pPhysPage The guest page in question.
2112 * @param iPhysExt The physical cross reference extent list to flush.
2113 */
2114void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2115{
2116 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2117 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2118 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2119
2120 const uint16_t iPhysExtStart = iPhysExt;
2121 PPGMPOOLPHYSEXT pPhysExt;
2122 do
2123 {
2124 Assert(iPhysExt < pPool->cMaxPhysExts);
2125 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2126 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2127 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2128 {
2129 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2130 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2131 }
2132
2133 /* next */
2134 iPhysExt = pPhysExt->iNext;
2135 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2136
2137 /* insert the list into the free list and clear the ram range entry. */
2138 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2139 pPool->iPhysExtFreeHead = iPhysExtStart;
2140 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2141
2142 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2143}
2144#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2145
2146
2147/**
2148 * Scans all shadow page tables for mappings of a physical page.
2149 *
2150 * This may be slow, but it's most likely more efficient than cleaning
2151 * out the entire page pool / cache.
2152 *
2153 * @returns VBox status code.
2154 * @retval VINF_SUCCESS if all references has been successfully cleared.
2155 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2156 * a page pool cleaning.
2157 *
2158 * @param pVM The VM handle.
2159 * @param pPhysPage The guest page in question.
2160 */
2161int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2162{
2163 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2164 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2165 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2166 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2167
2168#if 1
2169 /*
2170 * There is a limit to what makes sense.
2171 */
2172 if (pPool->cPresent > 1024)
2173 {
2174 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2175 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2176 return VINF_PGM_GCPHYS_ALIASED;
2177 }
2178#endif
2179
2180 /*
2181 * Iterate all the pages until we've encountered all that in use.
2182 * This is simple but not quite optimal solution.
2183 */
2184 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2185 const uint32_t u32 = u64;
2186 unsigned cLeft = pPool->cUsedPages;
2187 unsigned iPage = pPool->cCurPages;
2188 while (--iPage >= PGMPOOL_IDX_FIRST)
2189 {
2190 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2191 if (pPage->GCPhys != NIL_RTGCPHYS)
2192 {
2193 switch (pPage->enmKind)
2194 {
2195 /*
2196 * We only care about shadow page tables.
2197 */
2198 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2199 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2200 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2201 {
2202 unsigned cPresent = pPage->cPresent;
2203 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2204 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2205 if (pPT->a[i].n.u1Present)
2206 {
2207 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2208 {
2209 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2210 pPT->a[i].u = 0;
2211 }
2212 if (!--cPresent)
2213 break;
2214 }
2215 break;
2216 }
2217
2218 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2219 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2220 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2221 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2222 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2223 {
2224 unsigned cPresent = pPage->cPresent;
2225 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2226 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2227 if (pPT->a[i].n.u1Present)
2228 {
2229 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2230 {
2231 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2232 pPT->a[i].u = 0;
2233 }
2234 if (!--cPresent)
2235 break;
2236 }
2237 break;
2238 }
2239 }
2240 if (!--cLeft)
2241 break;
2242 }
2243 }
2244
2245 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2246 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2247 return VINF_SUCCESS;
2248}
2249
2250
2251/**
2252 * Clears the user entry in a user table.
2253 *
2254 * This is used to remove all references to a page when flushing it.
2255 */
2256static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2257{
2258 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2259 Assert(pUser->iUser < pPool->cCurPages);
2260
2261 /*
2262 * Map the user page.
2263 */
2264 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2265 union
2266 {
2267 uint64_t *pau64;
2268 uint32_t *pau32;
2269 } u;
2270 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2271
2272#ifdef VBOX_STRICT
2273 /*
2274 * Some sanity checks.
2275 */
2276 switch (pUserPage->enmKind)
2277 {
2278 case PGMPOOLKIND_ROOT_32BIT_PD:
2279 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2280 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2281 break;
2282 case PGMPOOLKIND_ROOT_PAE_PD:
2283 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2284 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2285 break;
2286 case PGMPOOLKIND_ROOT_PDPT:
2287 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2288 Assert(pUser->iUserTable < 4);
2289 break;
2290 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2291 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2292 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2293 break;
2294 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2295 case PGMPOOLKIND_ROOT_PML4:
2296 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2297 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2298 break;
2299 default:
2300 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2301 break;
2302 }
2303#endif /* VBOX_STRICT */
2304
2305 /*
2306 * Clear the entry in the user page.
2307 */
2308 switch (pUserPage->enmKind)
2309 {
2310 /* 32-bit entries */
2311 case PGMPOOLKIND_ROOT_32BIT_PD:
2312 u.pau32[pUser->iUserTable] = 0;
2313 break;
2314
2315 /* 64-bit entries */
2316 case PGMPOOLKIND_ROOT_PAE_PD:
2317 case PGMPOOLKIND_ROOT_PDPT:
2318 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2319 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2320 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2321 case PGMPOOLKIND_ROOT_PML4:
2322 u.pau64[pUser->iUserTable] = 0;
2323 break;
2324
2325 default:
2326 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2327 }
2328}
2329
2330
2331/**
2332 * Clears all users of a page.
2333 */
2334static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2335{
2336 /*
2337 * Free all the user records.
2338 */
2339 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2340 uint16_t i = pPage->iUserHead;
2341 while (i != NIL_PGMPOOL_USER_INDEX)
2342 {
2343 /* Clear enter in user table. */
2344 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2345
2346 /* Free it. */
2347 const uint16_t iNext = paUsers[i].iNext;
2348 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2349 paUsers[i].iNext = pPool->iUserFreeHead;
2350 pPool->iUserFreeHead = i;
2351
2352 /* Next. */
2353 i = iNext;
2354 }
2355 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2356}
2357
2358
2359#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2360/**
2361 * Allocates a new physical cross reference extent.
2362 *
2363 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2364 * @param pVM The VM handle.
2365 * @param piPhysExt Where to store the phys ext index.
2366 */
2367PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2368{
2369 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2370 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2371 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2372 {
2373 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2374 return NULL;
2375 }
2376 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2377 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2378 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2379 *piPhysExt = iPhysExt;
2380 return pPhysExt;
2381}
2382
2383
2384/**
2385 * Frees a physical cross reference extent.
2386 *
2387 * @param pVM The VM handle.
2388 * @param iPhysExt The extent to free.
2389 */
2390void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2391{
2392 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2393 Assert(iPhysExt < pPool->cMaxPhysExts);
2394 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2395 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2396 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2397 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2398 pPool->iPhysExtFreeHead = iPhysExt;
2399}
2400
2401
2402/**
2403 * Frees a physical cross reference extent.
2404 *
2405 * @param pVM The VM handle.
2406 * @param iPhysExt The extent to free.
2407 */
2408void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2409{
2410 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2411
2412 const uint16_t iPhysExtStart = iPhysExt;
2413 PPGMPOOLPHYSEXT pPhysExt;
2414 do
2415 {
2416 Assert(iPhysExt < pPool->cMaxPhysExts);
2417 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2418 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2419 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2420
2421 /* next */
2422 iPhysExt = pPhysExt->iNext;
2423 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2424
2425 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2426 pPool->iPhysExtFreeHead = iPhysExtStart;
2427}
2428
2429/**
2430 * Insert a reference into a list of physical cross reference extents.
2431 *
2432 * @returns The new ram range flags (top 16-bits).
2433 *
2434 * @param pVM The VM handle.
2435 * @param iPhysExt The physical extent index of the list head.
2436 * @param iShwPT The shadow page table index.
2437 *
2438 */
2439static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2440{
2441 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2442 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2443
2444 /* special common case. */
2445 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2446 {
2447 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2448 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2449 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2450 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2451 }
2452
2453 /* general treatment. */
2454 const uint16_t iPhysExtStart = iPhysExt;
2455 unsigned cMax = 15;
2456 for (;;)
2457 {
2458 Assert(iPhysExt < pPool->cMaxPhysExts);
2459 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2460 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2461 {
2462 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2463 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2464 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2465 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2466 }
2467 if (!--cMax)
2468 {
2469 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2470 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2471 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2472 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2473 }
2474 }
2475
2476 /* add another extent to the list. */
2477 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2478 if (!pNew)
2479 {
2480 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2481 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2482 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2483 }
2484 pNew->iNext = iPhysExtStart;
2485 pNew->aidx[0] = iShwPT;
2486 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2487 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2488}
2489
2490
2491/**
2492 * Add a reference to guest physical page where extents are in use.
2493 *
2494 * @returns The new ram range flags (top 16-bits).
2495 *
2496 * @param pVM The VM handle.
2497 * @param u16 The ram range flags (top 16-bits).
2498 * @param iShwPT The shadow page table index.
2499 */
2500uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2501{
2502 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2503 {
2504 /*
2505 * Convert to extent list.
2506 */
2507 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2508 uint16_t iPhysExt;
2509 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2510 if (pPhysExt)
2511 {
2512 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2513 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2514 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2515 pPhysExt->aidx[1] = iShwPT;
2516 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2517 }
2518 else
2519 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2520 }
2521 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2522 {
2523 /*
2524 * Insert into the extent list.
2525 */
2526 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2527 }
2528 else
2529 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2530 return u16;
2531}
2532
2533
2534/**
2535 * Clear references to guest physical memory.
2536 *
2537 * @param pPool The pool.
2538 * @param pPage The page.
2539 * @param pPhysPage Pointer to the aPages entry in the ram range.
2540 */
2541void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2542{
2543 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2544 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2545
2546 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2547 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2548 {
2549 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2550 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2551 do
2552 {
2553 Assert(iPhysExt < pPool->cMaxPhysExts);
2554
2555 /*
2556 * Look for the shadow page and check if it's all freed.
2557 */
2558 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2559 {
2560 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2561 {
2562 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2563
2564 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2565 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2566 {
2567 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2568 return;
2569 }
2570
2571 /* we can free the node. */
2572 PVM pVM = pPool->CTXSUFF(pVM);
2573 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2574 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2575 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2576 {
2577 /* lonely node */
2578 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2579 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2580 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2581 }
2582 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2583 {
2584 /* head */
2585 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2586 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2587 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2588 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2589 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2590 }
2591 else
2592 {
2593 /* in list */
2594 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2595 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2596 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2597 }
2598 iPhysExt = iPhysExtNext;
2599 return;
2600 }
2601 }
2602
2603 /* next */
2604 iPhysExtPrev = iPhysExt;
2605 iPhysExt = paPhysExts[iPhysExt].iNext;
2606 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2607
2608 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2609 }
2610 else /* nothing to do */
2611 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
2612}
2613
2614
2615
2616/**
2617 * Clear references to guest physical memory.
2618 *
2619 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2620 * is assumed to be correct, so the linear search can be skipped and we can assert
2621 * at an earlier point.
2622 *
2623 * @param pPool The pool.
2624 * @param pPage The page.
2625 * @param HCPhys The host physical address corresponding to the guest page.
2626 * @param GCPhys The guest physical address corresponding to HCPhys.
2627 */
2628static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2629{
2630 /*
2631 * Walk range list.
2632 */
2633 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2634 while (pRam)
2635 {
2636 RTGCPHYS off = GCPhys - pRam->GCPhys;
2637 if (off < pRam->cb)
2638 {
2639 /* does it match? */
2640 const unsigned iPage = off >> PAGE_SHIFT;
2641 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2642 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2643 {
2644 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2645 return;
2646 }
2647 break;
2648 }
2649 pRam = CTXALLSUFF(pRam->pNext);
2650 }
2651 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2652}
2653
2654
2655/**
2656 * Clear references to guest physical memory.
2657 *
2658 * @param pPool The pool.
2659 * @param pPage The page.
2660 * @param HCPhys The host physical address corresponding to the guest page.
2661 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2662 */
2663static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2664{
2665 /*
2666 * Walk range list.
2667 */
2668 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2669 while (pRam)
2670 {
2671 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2672 if (off < pRam->cb)
2673 {
2674 /* does it match? */
2675 const unsigned iPage = off >> PAGE_SHIFT;
2676 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2677 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2678 {
2679 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2680 return;
2681 }
2682 break;
2683 }
2684 pRam = CTXALLSUFF(pRam->pNext);
2685 }
2686
2687 /*
2688 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2689 */
2690 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2691 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2692 while (pRam)
2693 {
2694 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2695 while (iPage-- > 0)
2696 {
2697 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2698 {
2699 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2700 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2701 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2702 return;
2703 }
2704 }
2705 pRam = CTXALLSUFF(pRam->pNext);
2706 }
2707
2708 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2709}
2710
2711
2712/**
2713 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2714 *
2715 * @param pPool The pool.
2716 * @param pPage The page.
2717 * @param pShwPT The shadow page table (mapping of the page).
2718 * @param pGstPT The guest page table.
2719 */
2720DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2721{
2722 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2723 if (pShwPT->a[i].n.u1Present)
2724 {
2725 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2726 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2727 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2728 if (!--pPage->cPresent)
2729 break;
2730 }
2731}
2732
2733
2734/**
2735 * Clear references to guest physical memory in a PAE / 32-bit page table.
2736 *
2737 * @param pPool The pool.
2738 * @param pPage The page.
2739 * @param pShwPT The shadow page table (mapping of the page).
2740 * @param pGstPT The guest page table (just a half one).
2741 */
2742DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2743{
2744 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2745 if (pShwPT->a[i].n.u1Present)
2746 {
2747 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2748 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2749 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2750 }
2751}
2752
2753
2754/**
2755 * Clear references to guest physical memory in a PAE / PAE page table.
2756 *
2757 * @param pPool The pool.
2758 * @param pPage The page.
2759 * @param pShwPT The shadow page table (mapping of the page).
2760 * @param pGstPT The guest page table.
2761 */
2762DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2763{
2764 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2765 if (pShwPT->a[i].n.u1Present)
2766 {
2767 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
2768 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
2769 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2770 }
2771}
2772
2773
2774/**
2775 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2776 *
2777 * @param pPool The pool.
2778 * @param pPage The page.
2779 * @param pShwPT The shadow page table (mapping of the page).
2780 */
2781DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2782{
2783 RTGCPHYS GCPhys = pPage->GCPhys;
2784 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2785 if (pShwPT->a[i].n.u1Present)
2786 {
2787 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2788 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2789 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2790 }
2791}
2792
2793
2794/**
2795 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2796 *
2797 * @param pPool The pool.
2798 * @param pPage The page.
2799 * @param pShwPT The shadow page table (mapping of the page).
2800 */
2801DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2802{
2803 RTGCPHYS GCPhys = pPage->GCPhys;
2804 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2805 if (pShwPT->a[i].n.u1Present)
2806 {
2807 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2808 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
2809 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2810 }
2811}
2812#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2813
2814
2815/**
2816 * Clear references to shadowed pages in a PAE page directory.
2817 *
2818 * @param pPool The pool.
2819 * @param pPage The page.
2820 * @param pShwPD The shadow page directory (mapping of the page).
2821 */
2822DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2823{
2824 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2825 {
2826 if (pShwPD->a[i].n.u1Present)
2827 {
2828 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2829 if (pSubPage)
2830 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2831 else
2832 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2833 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2834 }
2835 }
2836}
2837
2838
2839/**
2840 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2841 *
2842 * @param pPool The pool.
2843 * @param pPage The page.
2844 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
2845 */
2846DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
2847{
2848 for (unsigned i = 0; i < ELEMENTS(pShwPDPT->a); i++)
2849 {
2850 if (pShwPDPT->a[i].n.u1Present)
2851 {
2852 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
2853 if (pSubPage)
2854 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2855 else
2856 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
2857 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2858 }
2859 }
2860}
2861
2862
2863/**
2864 * Clears all references made by this page.
2865 *
2866 * This includes other shadow pages and GC physical addresses.
2867 *
2868 * @param pPool The pool.
2869 * @param pPage The page.
2870 */
2871static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2872{
2873 /*
2874 * Map the shadow page and take action according to the page kind.
2875 */
2876 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2877 switch (pPage->enmKind)
2878 {
2879#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2880 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2881 {
2882 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2883 void *pvGst;
2884 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2885 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2886 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2887 break;
2888 }
2889
2890 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2891 {
2892 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2893 void *pvGst;
2894 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2895 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2896 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2897 break;
2898 }
2899
2900 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2901 {
2902 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2903 void *pvGst;
2904 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2905 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
2906 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2907 break;
2908 }
2909
2910 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
2911 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2912 {
2913 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2914 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
2915 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2916 break;
2917 }
2918
2919 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
2920 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2921 {
2922 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2923 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
2924 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2925 break;
2926 }
2927
2928#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2929 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2930 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2931 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2932 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2933 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2934 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2935 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2936 break;
2937#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2938
2939 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2940 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2941 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
2942 break;
2943
2944 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2945 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
2946 break;
2947
2948 default:
2949 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
2950 }
2951
2952 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
2953 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2954 ASMMemZeroPage(pvShw);
2955 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2956 pPage->fZeroed = true;
2957}
2958#endif /* PGMPOOL_WITH_USER_TRACKING */
2959
2960
2961/**
2962 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
2963 *
2964 * @param pPool The pool.
2965 */
2966static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
2967{
2968 /*
2969 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
2970 */
2971 Assert(NIL_PGMPOOL_IDX == 0);
2972 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
2973 {
2974 /*
2975 * Get the page address.
2976 */
2977 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2978 union
2979 {
2980 uint64_t *pau64;
2981 uint32_t *pau32;
2982 } u;
2983 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2984
2985 /*
2986 * Mark stuff not present.
2987 */
2988 switch (pPage->enmKind)
2989 {
2990 case PGMPOOLKIND_ROOT_32BIT_PD:
2991 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
2992 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2993 u.pau32[iPage] = 0;
2994 break;
2995
2996 case PGMPOOLKIND_ROOT_PAE_PD:
2997 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
2998 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2999 u.pau64[iPage] = 0;
3000 break;
3001
3002 case PGMPOOLKIND_ROOT_PML4:
3003 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
3004 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
3005 u.pau64[iPage] = 0;
3006 break;
3007
3008 case PGMPOOLKIND_ROOT_PDPT:
3009 /* Not root of shadowed pages currently, ignore it. */
3010 break;
3011 }
3012 }
3013
3014 /*
3015 * Paranoia (to be removed), flag a global CR3 sync.
3016 */
3017 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3018}
3019
3020
3021/**
3022 * Flushes the entire cache.
3023 *
3024 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3025 * and execute this CR3 flush.
3026 *
3027 * @param pPool The pool.
3028 */
3029static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3030{
3031 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3032 LogFlow(("pgmPoolFlushAllInt:\n"));
3033
3034 /*
3035 * If there are no pages in the pool, there is nothing to do.
3036 */
3037 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3038 {
3039 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3040 return;
3041 }
3042
3043 /*
3044 * Nuke the free list and reinsert all pages into it.
3045 */
3046 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3047 {
3048 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3049
3050#ifdef IN_RING3
3051 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3052#endif
3053#ifdef PGMPOOL_WITH_MONITORING
3054 if (pPage->fMonitored)
3055 pgmPoolMonitorFlush(pPool, pPage);
3056 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3057 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3058 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3059 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3060 pPage->cModifications = 0;
3061#endif
3062 pPage->GCPhys = NIL_RTGCPHYS;
3063 pPage->enmKind = PGMPOOLKIND_FREE;
3064 Assert(pPage->idx == i);
3065 pPage->iNext = i + 1;
3066 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3067 pPage->fSeenNonGlobal = false;
3068 pPage->fMonitored= false;
3069 pPage->fCached = false;
3070 pPage->fReusedFlushPending = false;
3071 pPage->fCR3Mix = false;
3072#ifdef PGMPOOL_WITH_USER_TRACKING
3073 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3074#endif
3075#ifdef PGMPOOL_WITH_CACHE
3076 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3077 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3078#endif
3079 }
3080 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3081 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3082 pPool->cUsedPages = 0;
3083
3084#ifdef PGMPOOL_WITH_USER_TRACKING
3085 /*
3086 * Zap and reinitialize the user records.
3087 */
3088 pPool->cPresent = 0;
3089 pPool->iUserFreeHead = 0;
3090 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3091 const unsigned cMaxUsers = pPool->cMaxUsers;
3092 for (unsigned i = 0; i < cMaxUsers; i++)
3093 {
3094 paUsers[i].iNext = i + 1;
3095 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3096 paUsers[i].iUserTable = 0xfffe;
3097 }
3098 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3099#endif
3100
3101#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3102 /*
3103 * Clear all the GCPhys links and rebuild the phys ext free list.
3104 */
3105 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3106 pRam;
3107 pRam = CTXALLSUFF(pRam->pNext))
3108 {
3109 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3110 while (iPage-- > 0)
3111 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3112 }
3113
3114 pPool->iPhysExtFreeHead = 0;
3115 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3116 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3117 for (unsigned i = 0; i < cMaxPhysExts; i++)
3118 {
3119 paPhysExts[i].iNext = i + 1;
3120 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3121 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3122 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3123 }
3124 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3125#endif
3126
3127#ifdef PGMPOOL_WITH_MONITORING
3128 /*
3129 * Just zap the modified list.
3130 */
3131 pPool->cModifiedPages = 0;
3132 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3133#endif
3134
3135#ifdef PGMPOOL_WITH_CACHE
3136 /*
3137 * Clear the GCPhys hash and the age list.
3138 */
3139 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3140 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3141 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3142 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3143#endif
3144
3145 /*
3146 * Flush all the special root pages.
3147 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3148 */
3149 pgmPoolFlushAllSpecialRoots(pPool);
3150 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3151 {
3152 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3153 pPage->iNext = NIL_PGMPOOL_IDX;
3154#ifdef PGMPOOL_WITH_MONITORING
3155 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3156 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3157 pPage->cModifications = 0;
3158 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3159 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3160 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3161 if (pPage->fMonitored)
3162 {
3163 PVM pVM = pPool->CTXSUFF(pVM);
3164 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3165 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3166 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3167 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
3168 pPool->pszAccessHandler);
3169 AssertFatalRCSuccess(rc);
3170# ifdef PGMPOOL_WITH_CACHE
3171 pgmPoolHashInsert(pPool, pPage);
3172# endif
3173 }
3174#endif
3175#ifdef PGMPOOL_WITH_USER_TRACKING
3176 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3177#endif
3178#ifdef PGMPOOL_WITH_CACHE
3179 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3180 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3181#endif
3182 }
3183
3184 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3185}
3186
3187
3188/**
3189 * Flushes a pool page.
3190 *
3191 * This moves the page to the free list after removing all user references to it.
3192 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3193 *
3194 * @returns VBox status code.
3195 * @retval VINF_SUCCESS on success.
3196 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3197 * @param pPool The pool.
3198 * @param HCPhys The HC physical address of the shadow page.
3199 */
3200int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3201{
3202 int rc = VINF_SUCCESS;
3203 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3204 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3205 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3206
3207 /*
3208 * Quietly reject any attempts at flushing any of the special root pages.
3209 */
3210 if (pPage->idx < PGMPOOL_IDX_FIRST)
3211 {
3212 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3213 return VINF_SUCCESS;
3214 }
3215
3216 /*
3217 * Mark the page as being in need of a ASMMemZeroPage().
3218 */
3219 pPage->fZeroed = false;
3220
3221#ifdef PGMPOOL_WITH_USER_TRACKING
3222 /*
3223 * Clear the page.
3224 */
3225 pgmPoolTrackClearPageUsers(pPool, pPage);
3226 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3227 pgmPoolTrackDeref(pPool, pPage);
3228 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3229#endif
3230
3231#ifdef PGMPOOL_WITH_CACHE
3232 /*
3233 * Flush it from the cache.
3234 */
3235 pgmPoolCacheFlushPage(pPool, pPage);
3236#endif /* PGMPOOL_WITH_CACHE */
3237
3238#ifdef PGMPOOL_WITH_MONITORING
3239 /*
3240 * Deregistering the monitoring.
3241 */
3242 if (pPage->fMonitored)
3243 rc = pgmPoolMonitorFlush(pPool, pPage);
3244#endif
3245
3246 /*
3247 * Free the page.
3248 */
3249 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3250 pPage->iNext = pPool->iFreeHead;
3251 pPool->iFreeHead = pPage->idx;
3252 pPage->enmKind = PGMPOOLKIND_FREE;
3253 pPage->GCPhys = NIL_RTGCPHYS;
3254 pPage->fReusedFlushPending = false;
3255
3256 pPool->cUsedPages--;
3257 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3258 return rc;
3259}
3260
3261
3262/**
3263 * Frees a usage of a pool page.
3264 *
3265 * The caller is responsible to updating the user table so that it no longer
3266 * references the shadow page.
3267 *
3268 * @param pPool The pool.
3269 * @param HCPhys The HC physical address of the shadow page.
3270 * @param iUser The shadow page pool index of the user table.
3271 * @param iUserTable The index into the user table (shadowed).
3272 */
3273void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3274{
3275 STAM_PROFILE_START(&pPool->StatFree, a);
3276 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3277 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3278 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3279#ifdef PGMPOOL_WITH_USER_TRACKING
3280 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3281#endif
3282#ifdef PGMPOOL_WITH_CACHE
3283 if (!pPage->fCached)
3284#endif
3285 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3286 STAM_PROFILE_STOP(&pPool->StatFree, a);
3287}
3288
3289
3290/**
3291 * Makes one or more free page free.
3292 *
3293 * @returns VBox status code.
3294 * @retval VINF_SUCCESS on success.
3295 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3296 *
3297 * @param pPool The pool.
3298 * @param iUser The user of the page.
3299 */
3300static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3301{
3302 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3303
3304 /*
3305 * If the pool isn't full grown yet, expand it.
3306 */
3307 if (pPool->cCurPages < pPool->cMaxPages)
3308 {
3309 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3310#ifdef IN_RING3
3311 int rc = PGMR3PoolGrow(pPool->pVMHC);
3312#else
3313 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3314#endif
3315 if (VBOX_FAILURE(rc))
3316 return rc;
3317 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3318 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3319 return VINF_SUCCESS;
3320 }
3321
3322#ifdef PGMPOOL_WITH_CACHE
3323 /*
3324 * Free one cached page.
3325 */
3326 return pgmPoolCacheFreeOne(pPool, iUser);
3327#else
3328 /*
3329 * Flush the pool.
3330 * If we have tracking enabled, it should be possible to come up with
3331 * a cheap replacement strategy...
3332 */
3333 pgmPoolFlushAllInt(pPool);
3334 return VERR_PGM_POOL_FLUSHED;
3335#endif
3336}
3337
3338
3339/**
3340 * Allocates a page from the pool.
3341 *
3342 * This page may actually be a cached page and not in need of any processing
3343 * on the callers part.
3344 *
3345 * @returns VBox status code.
3346 * @retval VINF_SUCCESS if a NEW page was allocated.
3347 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3348 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3349 * @param pVM The VM handle.
3350 * @param GCPhys The GC physical address of the page we're gonna shadow.
3351 * For 4MB and 2MB PD entries, it's the first address the
3352 * shadow PT is covering.
3353 * @param enmKind The kind of mapping.
3354 * @param iUser The shadow page pool index of the user table.
3355 * @param iUserTable The index into the user table (shadowed).
3356 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3357 */
3358int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3359{
3360 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3361 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3362 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3363
3364 *ppPage = NULL;
3365
3366#ifdef PGMPOOL_WITH_CACHE
3367 if (pPool->fCacheEnabled)
3368 {
3369 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3370 if (VBOX_SUCCESS(rc2))
3371 {
3372 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3373 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3374 return rc2;
3375 }
3376 }
3377#endif
3378
3379 /*
3380 * Allocate a new one.
3381 */
3382 int rc = VINF_SUCCESS;
3383 uint16_t iNew = pPool->iFreeHead;
3384 if (iNew == NIL_PGMPOOL_IDX)
3385 {
3386 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3387 if (VBOX_FAILURE(rc))
3388 {
3389 if (rc != VERR_PGM_POOL_CLEARED)
3390 {
3391 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3392 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3393 return rc;
3394 }
3395 rc = VERR_PGM_POOL_FLUSHED;
3396 }
3397 iNew = pPool->iFreeHead;
3398 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3399 }
3400
3401 /* unlink the free head */
3402 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3403 pPool->iFreeHead = pPage->iNext;
3404 pPage->iNext = NIL_PGMPOOL_IDX;
3405
3406 /*
3407 * Initialize it.
3408 */
3409 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3410 pPage->enmKind = enmKind;
3411 pPage->GCPhys = GCPhys;
3412 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3413 pPage->fMonitored = false;
3414 pPage->fCached = false;
3415 pPage->fReusedFlushPending = false;
3416 pPage->fCR3Mix = false;
3417#ifdef PGMPOOL_WITH_MONITORING
3418 pPage->cModifications = 0;
3419 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3420 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3421#endif
3422#ifdef PGMPOOL_WITH_USER_TRACKING
3423 pPage->cPresent = 0;
3424 pPage->iFirstPresent = ~0;
3425
3426 /*
3427 * Insert into the tracking and cache. If this fails, free the page.
3428 */
3429 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3430 if (VBOX_FAILURE(rc3))
3431 {
3432 if (rc3 != VERR_PGM_POOL_CLEARED)
3433 {
3434 pPool->cUsedPages--;
3435 pPage->enmKind = PGMPOOLKIND_FREE;
3436 pPage->GCPhys = NIL_RTGCPHYS;
3437 pPage->iNext = pPool->iFreeHead;
3438 pPool->iFreeHead = pPage->idx;
3439 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3440 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3441 return rc3;
3442 }
3443 rc = VERR_PGM_POOL_FLUSHED;
3444 }
3445#endif /* PGMPOOL_WITH_USER_TRACKING */
3446
3447 /*
3448 * Commit the allocation, clear the page and return.
3449 */
3450#ifdef VBOX_WITH_STATISTICS
3451 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3452 pPool->cUsedPagesHigh = pPool->cUsedPages;
3453#endif
3454
3455 if (!pPage->fZeroed)
3456 {
3457 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3458 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3459 ASMMemZeroPage(pv);
3460 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3461 }
3462
3463 *ppPage = pPage;
3464 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3465 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3466 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3467 return rc;
3468}
3469
3470
3471/**
3472 * Frees a usage of a pool page.
3473 *
3474 * @param pVM The VM handle.
3475 * @param HCPhys The HC physical address of the shadow page.
3476 * @param iUser The shadow page pool index of the user table.
3477 * @param iUserTable The index into the user table (shadowed).
3478 */
3479void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3480{
3481 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3482 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3483 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3484}
3485
3486
3487/**
3488 * Gets a in-use page in the pool by it's physical address.
3489 *
3490 * @returns Pointer to the page.
3491 * @param pVM The VM handle.
3492 * @param HCPhys The HC physical address of the shadow page.
3493 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3494 */
3495PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3496{
3497 /** @todo profile this! */
3498 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3499 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3500 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3501 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3502 return pPage;
3503}
3504
3505
3506/**
3507 * Flushes the entire cache.
3508 *
3509 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3510 * and execute this CR3 flush.
3511 *
3512 * @param pPool The pool.
3513 */
3514void pgmPoolFlushAll(PVM pVM)
3515{
3516 LogFlow(("pgmPoolFlushAll:\n"));
3517 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3518}
3519
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette