VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 7676

Last change on this file since 7676 was 7676, checked in by vboxsync, 17 years ago

Cleaned up.
AMD64 shadow paging is only valid with AMD64 guest paging. Other combinations removed.
Simplified paging #ifdefs.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 149.5 KB
Line 
1/* $Id: PGMAllBth.h 7676 2008-04-01 09:18:10Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 innotek GmbH
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 */
19
20/*******************************************************************************
21* Internal Functions *
22*******************************************************************************/
23__BEGIN_DECLS
24PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
25PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage);
26PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr);
27PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCUINTPTR GCPtrPage);
28PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PGSTPD pPDSrc, RTGCUINTPTR GCPtrPage);
29PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR Addr, unsigned fPage, unsigned uErr);
30PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage);
31PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint32_t cr0, uint32_t cr3, uint32_t cr4, bool fGlobal);
32#ifdef VBOX_STRICT
33PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint32_t cr3, uint32_t cr4, RTGCUINTPTR GCPtr = 0, RTGCUINTPTR cb = ~(RTGCUINTPTR)0);
34#endif
35#ifdef PGMPOOL_WITH_USER_TRACKING
36DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
37#endif
38__END_DECLS
39
40
41/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
42#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE
43#error "Invalid combination; PAE guest implies PAE shadow"
44#endif
45
46#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
47 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE)
48#error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
49#endif
50
51#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
52 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE)
53#error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
54#endif
55
56#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64)
57 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64)
58#error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
59#endif
60
61/**
62 * #PF Handler for raw-mode guest execution.
63 *
64 * @returns VBox status code (appropriate for trap handling and GC return).
65 * @param pVM VM Handle.
66 * @param uErr The trap error code.
67 * @param pRegFrame Trap register frame.
68 * @param pvFault The fault address.
69 */
70PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
71{
72#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE) && PGM_SHW_TYPE != PGM_TYPE_AMD64
73
74# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
75 /*
76 * Hide the instruction fetch trap indicator for now.
77 */
78 /** @todo NXE will change this and we must fix NXE in the switcher too! */
79 if (uErr & X86_TRAP_PF_ID)
80 {
81 uErr &= ~X86_TRAP_PF_ID;
82 TRPMSetErrorCode(pVM, uErr);
83 }
84# endif
85
86 /*
87 * Get PDs.
88 */
89 int rc;
90# if PGM_WITH_PAGING(PGM_GST_TYPE)
91# if PGM_GST_TYPE == PGM_TYPE_32BIT
92 const unsigned iPDSrc = (RTGCUINTPTR)pvFault >> GST_PD_SHIFT;
93 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
94# else /* PAE */
95 unsigned iPDSrc;
96 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, (RTGCUINTPTR)pvFault, &iPDSrc);
97# endif
98# else
99 PGSTPD pPDSrc = NULL;
100 const unsigned iPDSrc = 0;
101# endif
102
103 const unsigned iPDDst = (RTGCUINTPTR)pvFault >> SHW_PD_SHIFT;
104# if PGM_SHW_TYPE == PGM_TYPE_32BIT
105 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
106# else /* PAE */
107 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]; /* We treat this as a PD with 2048 entries. */
108# endif
109
110# if PGM_WITH_PAGING(PGM_GST_TYPE)
111# ifdef PGM_SYNC_DIRTY_BIT
112 /*
113 * If we successfully correct the write protection fault due to dirty bit
114 * tracking, or this page fault is a genuine one, then return immediately.
115 */
116 STAM_PROFILE_START(&pVM->pgm.s.StatCheckPageFault, e);
117 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], (RTGCUINTPTR)pvFault);
118 STAM_PROFILE_STOP(&pVM->pgm.s.StatCheckPageFault, e);
119 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
120 || rc == VINF_EM_RAW_GUEST_TRAP)
121 {
122 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution)
123 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatTrap0eDirtyAndAccessedBits : &pVM->pgm.s.StatTrap0eGuestTrap; });
124 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
125 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
126 }
127# endif
128
129 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0ePD[iPDSrc]);
130# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
131
132 /*
133 * A common case is the not-present error caused by lazy page table syncing.
134 *
135 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
136 * so we can safely assume that the shadow PT is present when calling SyncPage later.
137 *
138 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
139 * of mapping conflict and defer to SyncCR3 in R3.
140 * (Again, we do NOT support access handlers for non-present guest pages.)
141 *
142 */
143# if PGM_WITH_PAGING(PGM_GST_TYPE)
144 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
145# else
146 GSTPDE PdeSrc;
147 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
148 PdeSrc.n.u1Present = 1;
149 PdeSrc.n.u1Write = 1;
150 PdeSrc.n.u1Accessed = 1;
151 PdeSrc.n.u1User = 1;
152# endif
153 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
154 && !pPDDst->a[iPDDst].n.u1Present
155 && PdeSrc.n.u1Present
156 )
157
158 {
159 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eSyncPT; });
160 STAM_PROFILE_START(&pVM->pgm.s.StatLazySyncPT, f);
161 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
162 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, (RTGCUINTPTR)pvFault);
163 if (VBOX_SUCCESS(rc))
164 {
165 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
166 return rc;
167 }
168 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
169 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
170 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
171 return VINF_PGM_SYNC_CR3;
172 }
173
174# if PGM_WITH_PAGING(PGM_GST_TYPE)
175 /*
176 * Check if this address is within any of our mappings.
177 *
178 * This is *very* fast and it's gonna save us a bit of effort below and prevent
179 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
180 * (BTW, it's impossible to have physical access handlers in a mapping.)
181 */
182 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
183 {
184 STAM_PROFILE_START(&pVM->pgm.s.StatMapping, a);
185 PPGMMAPPING pMapping = CTXALLSUFF(pVM->pgm.s.pMappings);
186 for ( ; pMapping; pMapping = CTXALLSUFF(pMapping->pNext))
187 {
188 if ((RTGCUINTPTR)pvFault < (RTGCUINTPTR)pMapping->GCPtr)
189 break;
190 if ((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pMapping->GCPtr < pMapping->cb)
191 {
192 /*
193 * The first thing we check is if we've got an undetected conflict.
194 */
195 if (!pVM->pgm.s.fMappingsFixed)
196 {
197 unsigned iPT = pMapping->cPTs;
198 while (iPT-- > 0)
199 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
200 {
201 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eConflicts);
202 Log(("Trap0e: Detected Conflict %VGv-%VGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
203 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
204 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
205 return VINF_PGM_SYNC_CR3;
206 }
207 }
208
209 /*
210 * Check if the fault address is in a virtual page access handler range.
211 */
212 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->HyperVirtHandlers, pvFault);
213 if ( pCur
214 && (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
215 && uErr & X86_TRAP_PF_RW)
216 {
217# ifdef IN_GC
218 STAM_PROFILE_START(&pCur->Stat, h);
219 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
220 STAM_PROFILE_STOP(&pCur->Stat, h);
221# else
222 AssertFailed();
223 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
224# endif
225 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eMapHandler);
226 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
227 return rc;
228 }
229
230 /*
231 * Pretend we're not here and let the guest handle the trap.
232 */
233 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
234 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eMap);
235 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
236 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
237 return VINF_EM_RAW_GUEST_TRAP;
238 }
239 }
240 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
241 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
242# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
243
244 /*
245 * Check if this fault address is flagged for special treatment,
246 * which means we'll have to figure out the physical address and
247 * check flags associated with it.
248 *
249 * ASSUME that we can limit any special access handling to pages
250 * in page tables which the guest believes to be present.
251 */
252 if (PdeSrc.n.u1Present)
253 {
254 RTGCPHYS GCPhys = NIL_RTGCPHYS;
255
256# if PGM_WITH_PAGING(PGM_GST_TYPE)
257 uint32_t cr4 = CPUMGetGuestCR4(pVM);
258 if ( PdeSrc.b.u1Size
259 && (cr4 & X86_CR4_PSE))
260 GCPhys = (PdeSrc.u & GST_PDE_BIG_PG_MASK)
261 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
262 else
263 {
264 PX86PT pPTSrc;
265# ifdef IN_GC
266 rc = PGMGCDynMapGCPage(pVM, PdeSrc.u & GST_PDE_PG_MASK, (void **)&pPTSrc);
267# else
268 pPTSrc = (PX86PT)MMPhysGCPhys2HCVirt(pVM, PdeSrc.u & GST_PDE_PG_MASK, sizeof(*pPTSrc));
269 if (pPTSrc == 0)
270 rc = VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS;
271# endif
272 if (VBOX_SUCCESS(rc))
273 {
274 unsigned iPTESrc = ((RTGCUINTPTR)pvFault >> PAGE_SHIFT) & GST_PT_MASK;
275 if (pPTSrc->a[iPTESrc].n.u1Present)
276 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
277 }
278 }
279# else
280 /* No paging so the fault address is the physical address */
281 GCPhys = (RTGCPHYS)((RTGCUINTPTR)pvFault & ~PAGE_OFFSET_MASK);
282# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
283
284 /*
285 * If we have a GC address we'll check if it has any flags set.
286 */
287 if (GCPhys != NIL_RTGCPHYS)
288 {
289 STAM_PROFILE_START(&pVM->pgm.s.StatHandlers, b);
290
291 PPGMPAGE pPage;
292 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
293 if (VBOX_SUCCESS(rc))
294 {
295 if (PGM_PAGE_HAS_ANY_HANDLERS(pPage))
296 {
297 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
298 {
299 /*
300 * Physical page access handler.
301 */
302 const RTGCPHYS GCPhysFault = GCPhys | ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK);
303 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->PhysHandlers, GCPhysFault);
304 if (pCur)
305 {
306# ifdef PGM_SYNC_N_PAGES
307 /*
308 * If the region is write protected and we got a page not present fault, then sync
309 * the pages. If the fault was caused by a read, then restart the instruction.
310 * In case of write access continue to the GC write handler.
311 *
312 * ASSUMES that there is only one handler per page or that they have similar write properties.
313 */
314 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
315 && !(uErr & X86_TRAP_PF_P))
316 {
317 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
318 if ( VBOX_FAILURE(rc)
319 || !(uErr & X86_TRAP_PF_RW)
320 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
321 {
322 AssertRC(rc);
323 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
324 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
325 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
326 return rc;
327 }
328 }
329# endif
330
331 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
332 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
333 ("Unexpected trap for physical handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
334
335#if defined(IN_GC) || defined(IN_RING0)
336 if (CTXALLSUFF(pCur->pfnHandler))
337 {
338 STAM_PROFILE_START(&pCur->Stat, h);
339 rc = pCur->CTXALLSUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, CTXALLSUFF(pCur->pvUser));
340 STAM_PROFILE_STOP(&pCur->Stat, h);
341 }
342 else
343#endif
344 rc = VINF_EM_RAW_EMULATE_INSTR;
345 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersPhysical);
346 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
347 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndPhys; });
348 return rc;
349 }
350 }
351# if PGM_WITH_PAGING(PGM_GST_TYPE)
352 else
353 {
354# ifdef PGM_SYNC_N_PAGES
355 /*
356 * If the region is write protected and we got a page not present fault, then sync
357 * the pages. If the fault was caused by a read, then restart the instruction.
358 * In case of write access continue to the GC write handler.
359 */
360 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
361 && !(uErr & X86_TRAP_PF_P))
362 {
363 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
364 if ( VBOX_FAILURE(rc)
365 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
366 || !(uErr & X86_TRAP_PF_RW))
367 {
368 AssertRC(rc);
369 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
370 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
371 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndVirt; });
372 return rc;
373 }
374 }
375# endif
376 /*
377 * Ok, it's an virtual page access handler.
378 *
379 * Since it's faster to search by address, we'll do that first
380 * and then retry by GCPhys if that fails.
381 */
382 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
383 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
384 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
385 */
386 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
387 if (pCur)
388 {
389 AssertMsg(!((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
390 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
391 || !(uErr & X86_TRAP_PF_P)
392 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
393 ("Unexpected trap for virtual handler: %VGv (phys=%VGp) HCPhys=%HGp uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
394
395 if ( (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
396 && ( uErr & X86_TRAP_PF_RW
397 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
398 {
399# ifdef IN_GC
400 STAM_PROFILE_START(&pCur->Stat, h);
401 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
402 STAM_PROFILE_STOP(&pCur->Stat, h);
403# else
404 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
405# endif
406 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtual);
407 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
408 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
409 return rc;
410 }
411 /* Unhandled part of a monitored page */
412 }
413 else
414 {
415 /* Check by physical address. */
416 PPGMVIRTHANDLER pCur;
417 unsigned iPage;
418 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK),
419 &pCur, &iPage);
420 Assert(VBOX_SUCCESS(rc) || !pCur);
421 if ( pCur
422 && ( uErr & X86_TRAP_PF_RW
423 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
424 {
425 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
426# ifdef IN_GC
427 RTGCUINTPTR off = (iPage << PAGE_SHIFT) + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK) - ((RTGCUINTPTR)pCur->GCPtr & PAGE_OFFSET_MASK);
428 Assert(off < pCur->cb);
429 STAM_PROFILE_START(&pCur->Stat, h);
430 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, off);
431 STAM_PROFILE_STOP(&pCur->Stat, h);
432# else
433 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
434# endif
435 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualByPhys);
436 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
437 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
438 return rc;
439 }
440 }
441 }
442# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
443
444 /*
445 * There is a handled area of the page, but this fault doesn't belong to it.
446 * We must emulate the instruction.
447 *
448 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
449 * we first check if this was a page-not-present fault for a page with only
450 * write access handlers. Restart the instruction if it wasn't a write access.
451 */
452 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersUnhandled);
453
454 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
455 && !(uErr & X86_TRAP_PF_P))
456 {
457 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
458 if ( VBOX_FAILURE(rc)
459 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
460 || !(uErr & X86_TRAP_PF_RW))
461 {
462 AssertRC(rc);
463 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
464 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
465 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
466 return rc;
467 }
468 }
469
470 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
471 * It's writing to an unhandled part of the LDT page several million times.
472 */
473 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
474 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d HCPhys=%RHp%s%s\n",
475 rc, pPage->HCPhys,
476 PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage) ? " phys" : "",
477 PGM_PAGE_HAS_ANY_VIRTUAL_HANDLERS(pPage) ? " virt" : ""));
478 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
479 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndUnhandled; });
480 return rc;
481 } /* if any kind of handler */
482
483# if PGM_WITH_PAGING(PGM_GST_TYPE)
484 if (uErr & X86_TRAP_PF_P)
485 {
486 /*
487 * The page isn't marked, but it might still be monitored by a virtual page access handler.
488 * (ASSUMES no temporary disabling of virtual handlers.)
489 */
490 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
491 * we should correct both the shadow page table and physical memory flags, and not only check for
492 * accesses within the handler region but for access to pages with virtual handlers. */
493 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
494 if (pCur)
495 {
496 AssertMsg( !((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
497 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
498 || !(uErr & X86_TRAP_PF_P)
499 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
500 ("Unexpected trap for virtual handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
501
502 if ( (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
503 && ( uErr & X86_TRAP_PF_RW
504 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
505 {
506# ifdef IN_GC
507 STAM_PROFILE_START(&pCur->Stat, h);
508 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
509 STAM_PROFILE_STOP(&pCur->Stat, h);
510# else
511 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
512# endif
513 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualUnmarked);
514 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
515 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
516 return rc;
517 }
518 }
519 }
520# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
521 }
522 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
523
524# ifdef PGM_OUT_OF_SYNC_IN_GC
525 /*
526 * We are here only if page is present in Guest page tables and trap is not handled
527 * by our handlers.
528 * Check it for page out-of-sync situation.
529 */
530 STAM_PROFILE_START(&pVM->pgm.s.StatOutOfSync, c);
531
532 if (!(uErr & X86_TRAP_PF_P))
533 {
534 /*
535 * Page is not present in our page tables.
536 * Try to sync it!
537 * BTW, fPageShw is invalid in this branch!
538 */
539 if (uErr & X86_TRAP_PF_US)
540 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
541 else /* supervisor */
542 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
543
544# if defined(LOG_ENABLED) && !defined(IN_RING0)
545 RTGCPHYS GCPhys;
546 uint64_t fPageGst;
547 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
548 Log(("Page out of sync: %p eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%VGp scan=%d\n",
549 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTGCPTR)pRegFrame->eip)));
550# endif /* LOG_ENABLED */
551
552# if PGM_WITH_PAGING(PGM_GST_TYPE) && !defined(IN_RING0)
553 if (CPUMGetGuestCPL(pVM, pRegFrame) == 0)
554 {
555 uint64_t fPageGst;
556 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
557 if ( VBOX_SUCCESS(rc)
558 && !(fPageGst & X86_PTE_US))
559 {
560 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
561 if ( pvFault == (RTGCPTR)pRegFrame->eip
562 || (RTGCUINTPTR)pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
563# ifdef CSAM_DETECT_NEW_CODE_PAGES
564 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
565 && CSAMDoesPageNeedScanning(pVM, (RTGCPTR)pRegFrame->eip)) /* any new code we encounter here */
566# endif /* CSAM_DETECT_NEW_CODE_PAGES */
567 )
568 {
569 LogFlow(("CSAMExecFault %VGv\n", pRegFrame->eip));
570 rc = CSAMExecFault(pVM, (RTGCPTR)pRegFrame->eip);
571 if (rc != VINF_SUCCESS)
572 {
573 /*
574 * CSAM needs to perform a job in ring 3.
575 *
576 * Sync the page before going to the host context; otherwise we'll end up in a loop if
577 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
578 */
579 LogFlow(("CSAM ring 3 job\n"));
580 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
581 AssertRC(rc2);
582
583 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
584 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eCSAM; });
585 return rc;
586 }
587 }
588# ifdef CSAM_DETECT_NEW_CODE_PAGES
589 else
590 if ( uErr == X86_TRAP_PF_RW
591 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
592 && pRegFrame->ecx < 0x10000
593 )
594 {
595 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
596 * to detect loading of new code pages.
597 */
598
599 /*
600 * Decode the instruction.
601 */
602 RTGCPTR PC;
603 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
604 if (rc == VINF_SUCCESS)
605 {
606 DISCPUSTATE Cpu;
607 uint32_t cbOp;
608 rc = EMInterpretDisasOneEx(pVM, (RTGCUINTPTR)PC, pRegFrame, &Cpu, &cbOp);
609
610 /* For now we'll restrict this to rep movsw/d instructions */
611 if ( rc == VINF_SUCCESS
612 && Cpu.pCurInstr->opcode == OP_MOVSWD
613 && (Cpu.prefix & PREFIX_REP))
614 {
615 CSAMMarkPossibleCodePage(pVM, pvFault);
616 }
617 }
618 }
619# endif /* CSAM_DETECT_NEW_CODE_PAGES */
620
621 /*
622 * Mark this page as safe.
623 */
624 /** @todo not correct for pages that contain both code and data!! */
625 Log2(("CSAMMarkPage %p; scanned=%d\n", pvFault, true));
626 CSAMMarkPage(pVM, pvFault, true);
627 }
628 }
629# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) && !defined(IN_RING0) */
630 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
631 if (VBOX_SUCCESS(rc))
632 {
633 /* The page was successfully synced, return to the guest. */
634 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
635 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSync; });
636 return VINF_SUCCESS;
637 }
638 }
639 else
640 {
641 /*
642 * A side effect of not flushing global PDEs are out of sync pages due
643 * to physical monitored regions, that are no longer valid.
644 * Assume for now it only applies to the read/write flag
645 */
646 if (VBOX_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
647 {
648 if (uErr & X86_TRAP_PF_US)
649 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
650 else /* supervisor */
651 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
652
653
654 /*
655 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the page is not present, which is not true in this case.
656 */
657 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
658 if (VBOX_SUCCESS(rc))
659 {
660 /*
661 * Page was successfully synced, return to guest.
662 */
663# ifdef VBOX_STRICT
664 RTGCPHYS GCPhys;
665 uint64_t fPageGst;
666 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
667 Assert(VBOX_SUCCESS(rc) && fPageGst & X86_PTE_RW);
668 LogFlow(("Obsolete physical monitor page out of sync %VGv - phys %VGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
669
670 uint64_t fPageShw;
671 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
672 Assert(VBOX_SUCCESS(rc) && fPageShw & X86_PTE_RW);
673# endif /* VBOX_STRICT */
674 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
675 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncObsHnd; });
676 return VINF_SUCCESS;
677 }
678 }
679
680# if PGM_WITH_PAGING(PGM_GST_TYPE)
681# ifdef VBOX_STRICT
682 /*
683 * Check for VMM page flags vs. Guest page flags consistency.
684 * Currently only for debug purposes.
685 */
686 if (VBOX_SUCCESS(rc))
687 {
688 /* Get guest page flags. */
689 uint64_t fPageGst;
690 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
691 if (VBOX_SUCCESS(rc))
692 {
693 uint64_t fPageShw;
694 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
695
696 /*
697 * Compare page flags.
698 * Note: we have AVL, A, D bits desynched.
699 */
700 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
701 ("Page flags mismatch! pvFault=%p GCPhys=%VGp fPageShw=%08llx fPageGst=%08llx\n", pvFault, GCPhys, fPageShw, fPageGst));
702 }
703 else
704 AssertMsgFailed(("PGMGstGetPage rc=%Vrc\n", rc));
705 }
706 else
707 AssertMsgFailed(("PGMGCGetPage rc=%Vrc\n", rc));
708# endif /* VBOX_STRICT */
709# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
710 }
711 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
712# endif /* PGM_OUT_OF_SYNC_IN_GC */
713 }
714 else
715 {
716 /*
717 * Page not present in Guest OS or invalid page table address.
718 * This is potential virtual page access handler food.
719 *
720 * For the present we'll say that our access handlers don't
721 * work for this case - we've already discarded the page table
722 * not present case which is identical to this.
723 *
724 * When we perchance find we need this, we will probably have AVL
725 * trees (offset based) to operate on and we can measure their speed
726 * agains mapping a page table and probably rearrange this handling
727 * a bit. (Like, searching virtual ranges before checking the
728 * physical address.)
729 */
730 }
731 }
732
733
734# if PGM_WITH_PAGING(PGM_GST_TYPE)
735 /*
736 * Conclusion, this is a guest trap.
737 */
738 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
739 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eUnhandled);
740 return VINF_EM_RAW_GUEST_TRAP;
741# else
742 /* present, but not a monitored page; perhaps the guest is probing physical memory */
743 return VINF_EM_RAW_EMULATE_INSTR;
744# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
745
746
747#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
748
749 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
750 return VERR_INTERNAL_ERROR;
751#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
752}
753
754
755/**
756 * Emulation of the invlpg instruction.
757 *
758 *
759 * @returns VBox status code.
760 *
761 * @param pVM VM handle.
762 * @param GCPtrPage Page to invalidate.
763 *
764 * @remark ASSUMES that the guest is updating before invalidating. This order
765 * isn't required by the CPU, so this is speculative and could cause
766 * trouble.
767 *
768 * @todo Flush page or page directory only if necessary!
769 * @todo Add a #define for simply invalidating the page.
770 */
771PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage)
772{
773#if PGM_GST_TYPE == PGM_TYPE_32BIT \
774 || PGM_GST_TYPE == PGM_TYPE_PAE
775
776 LogFlow(("InvalidatePage %x\n", GCPtrPage));
777 /*
778 * Get the shadow PD entry and skip out if this PD isn't present.
779 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
780 */
781 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
782# if PGM_SHW_TYPE == PGM_TYPE_32BIT
783 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
784# else
785 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs[0])->a[iPDDst];
786# endif
787 const SHWPDE PdeDst = *pPdeDst;
788 if (!PdeDst.n.u1Present)
789 {
790 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
791 return VINF_SUCCESS;
792 }
793
794 /*
795 * Get the guest PD entry and calc big page.
796 */
797# if PGM_GST_TYPE == PGM_TYPE_32BIT
798 PX86PD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
799 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
800 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
801# else /* PAE */
802 unsigned iPDSrc;
803 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
804 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
805# endif
806
807 const uint32_t cr4 = CPUMGetGuestCR4(pVM);
808 const bool fIsBigPage = PdeSrc.b.u1Size && (cr4 & X86_CR4_PSE);
809
810# ifdef IN_RING3
811 /*
812 * If a CR3 Sync is pending we may ignore the invalidate page operation
813 * depending on the kind of sync and if it's a global page or not.
814 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
815 */
816# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
817 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
818 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
819 && fIsBigPage
820 && PdeSrc.b.u1Global
821 && (cr4 & X86_CR4_PGE)
822 )
823 )
824# else
825 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
826# endif
827 {
828 STAM_COUNTER_INC(&pVM->pgm.s.StatHCInvalidatePageSkipped);
829 return VINF_SUCCESS;
830 }
831# endif /* IN_RING3 */
832
833
834 /*
835 * Deal with the Guest PDE.
836 */
837 int rc = VINF_SUCCESS;
838 if (PdeSrc.n.u1Present)
839 {
840 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
841 {
842 /*
843 * Conflict - Let SyncPT deal with it to avoid duplicate code.
844 */
845 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
846 Assert(PGMGetGuestMode(pVM) <= PGMMODE_32_BIT);
847 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
848 }
849 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
850 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
851 {
852 /*
853 * Mark not present so we can resync the PDE when it's used.
854 */
855 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
856 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
857 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
858 pPdeDst->u = 0;
859 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
860 PGM_INVL_GUEST_TLBS();
861 }
862# ifdef PGM_SYNC_ACCESSED_BIT
863 else if (!PdeSrc.n.u1Accessed)
864 {
865 /*
866 * Mark not present so we can set the accessed bit.
867 */
868 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
869 pPdeDst->u = 0;
870 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
871 PGM_INVL_GUEST_TLBS();
872 }
873# endif
874 else if (!fIsBigPage)
875 {
876 /*
877 * 4KB - page.
878 */
879 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
880 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
881# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
882 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
883 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
884# endif
885 if (pShwPage->GCPhys == GCPhys)
886 {
887# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
888 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
889 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
890 if (pPT->a[iPTEDst].n.u1Present)
891 {
892# ifdef PGMPOOL_WITH_USER_TRACKING
893 /* This is very unlikely with caching/monitoring enabled. */
894 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
895# endif
896 pPT->a[iPTEDst].u = 0;
897 }
898# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
899 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
900 if (VBOX_SUCCESS(rc))
901 rc = VINF_SUCCESS;
902# endif
903 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4KBPages));
904 PGM_INVL_PG(GCPtrPage);
905 }
906 else
907 {
908 /*
909 * The page table address changed.
910 */
911 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%VGp iPDDst=%#x\n",
912 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
913 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
914 pPdeDst->u = 0;
915 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
916 PGM_INVL_GUEST_TLBS();
917 }
918 }
919 else
920 {
921 /*
922 * 4MB - page.
923 */
924 /* Before freeing the page, check if anything really changed. */
925 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
926 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
927# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
928 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
929 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
930# endif
931 if ( pShwPage->GCPhys == GCPhys
932 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
933 {
934 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
935 /** @todo PAT */
936# ifdef PGM_SYNC_DIRTY_BIT
937 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
938 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
939 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
940 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
941# else
942 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
943 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD)))
944# endif
945 {
946 LogFlow(("Skipping flush for big page containing %VGv (PD=%X)-> nothing has changed!\n", GCPtrPage, iPDSrc));
947 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPagesSkip));
948 return VINF_SUCCESS;
949 }
950 }
951
952 /*
953 * Ok, the page table is present and it's been changed in the guest.
954 * If we're in host context, we'll just mark it as not present taking the lazy approach.
955 * We could do this for some flushes in GC too, but we need an algorithm for
956 * deciding which 4MB pages containing code likely to be executed very soon.
957 */
958 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
959 pPdeDst->u = 0;
960 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPages));
961 PGM_INVL_BIG_PG(GCPtrPage);
962 }
963 }
964 else
965 {
966 /*
967 * Page directory is not present, mark shadow PDE not present.
968 */
969 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
970 {
971 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
972 pPdeDst->u = 0;
973 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
974 PGM_INVL_PG(GCPtrPage);
975 }
976 else
977 {
978 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
979 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDMappings));
980 }
981 }
982
983 return rc;
984
985#elif PGM_GST_TYPE == PGM_TYPE_AMD64
986//# error not implemented
987 return VERR_INTERNAL_ERROR;
988
989#else /* guest real and protected mode */
990 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
991 return VINF_SUCCESS;
992#endif
993}
994
995
996#ifdef PGMPOOL_WITH_USER_TRACKING
997/**
998 * Update the tracking of shadowed pages.
999 *
1000 * @param pVM The VM handle.
1001 * @param pShwPage The shadow page.
1002 * @param HCPhys The physical page we is being dereferenced.
1003 */
1004DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1005{
1006# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1007 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1008 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%VHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1009
1010 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1011 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1012 * 2. write protect all shadowed pages. I.e. implement caching.
1013 */
1014 /*
1015 * Find the guest address.
1016 */
1017 for (PPGMRAMRANGE pRam = CTXALLSUFF(pVM->pgm.s.pRamRanges);
1018 pRam;
1019 pRam = CTXALLSUFF(pRam->pNext))
1020 {
1021 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1022 while (iPage-- > 0)
1023 {
1024 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1025 {
1026 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1027 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1028 pShwPage->cPresent--;
1029 pPool->cPresent--;
1030 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1031 return;
1032 }
1033 }
1034 }
1035
1036 for (;;)
1037 AssertReleaseMsgFailed(("HCPhys=%VHp wasn't found!\n", HCPhys));
1038# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1039 pShwPage->cPresent--;
1040 pVM->pgm.s.CTXSUFF(pPool)->cPresent--;
1041# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1042}
1043
1044
1045/**
1046 * Update the tracking of shadowed pages.
1047 *
1048 * @param pVM The VM handle.
1049 * @param pShwPage The shadow page.
1050 * @param u16 The top 16-bit of the pPage->HCPhys.
1051 * @param pPage Pointer to the guest page. this will be modified.
1052 * @param iPTDst The index into the shadow table.
1053 */
1054DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1055{
1056# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1057 /*
1058 * We're making certain assumptions about the placement of cRef and idx.
1059 */
1060 Assert(MM_RAM_FLAGS_IDX_SHIFT == 48);
1061 Assert(MM_RAM_FLAGS_CREFS_SHIFT > MM_RAM_FLAGS_IDX_SHIFT);
1062
1063 /*
1064 * Just deal with the simple first time here.
1065 */
1066 if (!u16)
1067 {
1068 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1069 u16 = (1 << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) | pShwPage->idx;
1070 }
1071 else
1072 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1073
1074 /* write back, trying to be clever... */
1075 Log2(("SyncPageWorkerTrackAddRef: u16=%#x pPage->HCPhys=%VHp->%VHp iPTDst=%#x\n",
1076 u16, pPage->HCPhys, (pPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) | ((uint64_t)u16 << MM_RAM_FLAGS_CREFS_SHIFT), iPTDst));
1077 *((uint16_t *)&pPage->HCPhys + 3) = u16; /** @todo PAGE FLAGS */
1078# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1079
1080 /* update statistics. */
1081 pVM->pgm.s.CTXSUFF(pPool)->cPresent++;
1082 pShwPage->cPresent++;
1083 if (pShwPage->iFirstPresent > iPTDst)
1084 pShwPage->iFirstPresent = iPTDst;
1085}
1086#endif /* PGMPOOL_WITH_USER_TRACKING */
1087
1088
1089/**
1090 * Creates a 4K shadow page for a guest page.
1091 *
1092 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1093 * physical address. The PdeSrc argument only the flags are used. No page structured
1094 * will be mapped in this function.
1095 *
1096 * @param pVM VM handle.
1097 * @param pPteDst Destination page table entry.
1098 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1099 * Can safely assume that only the flags are being used.
1100 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1101 * @param pShwPage Pointer to the shadow page.
1102 * @param iPTDst The index into the shadow table.
1103 *
1104 * @remark Not used for 2/4MB pages!
1105 */
1106DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1107{
1108 if (PteSrc.n.u1Present)
1109 {
1110 /*
1111 * Find the ram range.
1112 */
1113 PPGMPAGE pPage;
1114 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1115 if (VBOX_SUCCESS(rc))
1116 {
1117 /** @todo investiage PWT, PCD and PAT. */
1118 /*
1119 * Make page table entry.
1120 */
1121 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo FLAGS */
1122 SHWPTE PteDst;
1123 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1124 {
1125 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1126 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1127 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1128 | (HCPhys & X86_PTE_PAE_PG_MASK);
1129 else
1130 {
1131 LogFlow(("SyncPageWorker: monitored page (%VGp) -> mark not present\n", HCPhys));
1132 PteDst.u = 0;
1133 }
1134 /** @todo count these two kinds. */
1135 }
1136 else
1137 {
1138#ifdef PGM_SYNC_DIRTY_BIT
1139# ifdef PGM_SYNC_ACCESSED_BIT
1140 /*
1141 * If the page or page directory entry is not marked accessed,
1142 * we mark the page not present.
1143 */
1144 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1145 {
1146 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1147 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,AccessedPage));
1148 PteDst.u = 0;
1149 }
1150 else
1151# endif
1152 /*
1153 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1154 * when the page is modified.
1155 */
1156 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1157 {
1158 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPage));
1159 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1160 | (HCPhys & X86_PTE_PAE_PG_MASK)
1161 | PGM_PTFLAGS_TRACK_DIRTY;
1162 }
1163 else
1164 {
1165 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageSkipped));
1166 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1167 | (HCPhys & X86_PTE_PAE_PG_MASK);
1168 }
1169#endif
1170 }
1171
1172#ifdef PGMPOOL_WITH_USER_TRACKING
1173 /*
1174 * Keep user track up to date.
1175 */
1176 if (PteDst.n.u1Present)
1177 {
1178 if (!pPteDst->n.u1Present)
1179 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1180 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1181 {
1182 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1183 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1184 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1185 }
1186 }
1187 else if (pPteDst->n.u1Present)
1188 {
1189 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1190 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1191 }
1192#endif /* PGMPOOL_WITH_USER_TRACKING */
1193
1194 /*
1195 * Update statistics and commit the entry.
1196 */
1197 if (!PteSrc.n.u1Global)
1198 pShwPage->fSeenNonGlobal = true;
1199 *pPteDst = PteDst;
1200 }
1201 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1202 /** @todo count these. */
1203 }
1204 else
1205 {
1206 /*
1207 * Page not-present.
1208 */
1209 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1210#ifdef PGMPOOL_WITH_USER_TRACKING
1211 /* Keep user track up to date. */
1212 if (pPteDst->n.u1Present)
1213 {
1214 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1215 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1216 }
1217#endif /* PGMPOOL_WITH_USER_TRACKING */
1218 pPteDst->u = 0;
1219 /** @todo count these. */
1220 }
1221}
1222
1223
1224/**
1225 * Syncs a guest OS page.
1226 *
1227 * There are no conflicts at this point, neither is there any need for
1228 * page table allocations.
1229 *
1230 * @returns VBox status code.
1231 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1232 * @param pVM VM handle.
1233 * @param PdeSrc Page directory entry of the guest.
1234 * @param GCPtrPage Guest context page address.
1235 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1236 * @param uErr Fault error (X86_TRAP_PF_*).
1237 */
1238PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr)
1239{
1240 LogFlow(("SyncPage: GCPtrPage=%VGv cPages=%d uErr=%#x\n", GCPtrPage, cPages, uErr));
1241
1242#if PGM_GST_TYPE == PGM_TYPE_32BIT \
1243 || PGM_GST_TYPE == PGM_TYPE_PAE
1244
1245 /*
1246 * Assert preconditions.
1247 */
1248 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPagePD[(GCPtrPage >> X86_PD_SHIFT) & GST_PD_MASK]);
1249 Assert(PdeSrc.n.u1Present);
1250 Assert(cPages);
1251
1252 /*
1253 * Get the shadow PDE, find the shadow page table in the pool.
1254 */
1255 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1256# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1257 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1258# else /* PAE */
1259 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1260# endif
1261 Assert(PdeDst.n.u1Present);
1262 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1263
1264 /*
1265 * Check that the page is present and that the shadow PDE isn't out of sync.
1266 */
1267 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1268 RTGCPHYS GCPhys;
1269 if (!fBigPage)
1270 {
1271 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1272# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1273 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1274 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1275# endif
1276 }
1277 else
1278 {
1279 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
1280# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1281 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1282 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1283# endif
1284 }
1285 if ( pShwPage->GCPhys == GCPhys
1286 && PdeSrc.n.u1Present
1287 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1288 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1289# if PGM_GST_TYPE == PGM_TYPE_PAE
1290 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1291# endif
1292 )
1293 {
1294# ifdef PGM_SYNC_ACCESSED_BIT
1295 /*
1296 * Check that the PDE is marked accessed already.
1297 * Since we set the accessed bit *before* getting here on a #PF, this
1298 * check is only meant for dealing with non-#PF'ing paths.
1299 */
1300 if (PdeSrc.n.u1Accessed)
1301# endif
1302 {
1303 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1304 if (!fBigPage)
1305 {
1306 /*
1307 * 4KB Page - Map the guest page table.
1308 */
1309 PGSTPT pPTSrc;
1310 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1311 if (VBOX_SUCCESS(rc))
1312 {
1313# ifdef PGM_SYNC_N_PAGES
1314 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1315 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1316 {
1317 /*
1318 * This code path is currently only taken when the caller is PGMTrap0eHandler
1319 * for non-present pages!
1320 *
1321 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1322 * deal with locality.
1323 */
1324 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1325# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1326 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1327 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1328# else
1329 const unsigned offPTSrc = 0;
1330# endif
1331 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1332 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1333 iPTDst = 0;
1334 else
1335 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1336 for (; iPTDst < iPTDstEnd; iPTDst++)
1337 {
1338 if (!pPTDst->a[iPTDst].n.u1Present)
1339 {
1340 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1341 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1342 NOREF(GCPtrCurPage);
1343#ifndef IN_RING0
1344 /*
1345 * Assuming kernel code will be marked as supervisor - and not as user level
1346 * and executed using a conforming code selector - And marked as readonly.
1347 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1348 */
1349 PPGMPAGE pPage;
1350 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1351 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1352 || !CSAMDoesPageNeedScanning(pVM, (RTGCPTR)GCPtrCurPage)
1353 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1354 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1355 )
1356#endif /* else: CSAM not active */
1357 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1358 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1359 GCPtrCurPage, PteSrc.n.u1Present,
1360 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1361 PteSrc.n.u1User & PdeSrc.n.u1User,
1362 (uint64_t)PteSrc.u,
1363 (uint64_t)pPTDst->a[iPTDst].u,
1364 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1365 }
1366 }
1367 }
1368 else
1369# endif /* PGM_SYNC_N_PAGES */
1370 {
1371 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1372 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1373 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1374 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1375 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1376 GCPtrPage, PteSrc.n.u1Present,
1377 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1378 PteSrc.n.u1User & PdeSrc.n.u1User,
1379 (uint64_t)PteSrc.u,
1380 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1381 }
1382 }
1383 else /* MMIO or invalid page: emulated in #PF handler. */
1384 {
1385 LogFlow(("PGM_GCPHYS_2_PTR %VGp failed with %Vrc\n", GCPhys, rc));
1386 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1387 }
1388 }
1389 else
1390 {
1391 /*
1392 * 4/2MB page - lazy syncing shadow 4K pages.
1393 * (There are many causes of getting here, it's no longer only CSAM.)
1394 */
1395 /* Calculate the GC physical address of this 4KB shadow page. */
1396 RTGCPHYS GCPhys = (PdeSrc.u & GST_PDE_BIG_PG_MASK) | ((RTGCUINTPTR)GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1397 /* Find ram range. */
1398 PPGMPAGE pPage;
1399 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1400 if (VBOX_SUCCESS(rc))
1401 {
1402 /*
1403 * Make shadow PTE entry.
1404 */
1405 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo PAGE FLAGS */
1406 SHWPTE PteDst;
1407 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1408 | (HCPhys & X86_PTE_PAE_PG_MASK);
1409 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1410 {
1411 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1412 PteDst.n.u1Write = 0;
1413 else
1414 PteDst.u = 0;
1415 }
1416 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1417# ifdef PGMPOOL_WITH_USER_TRACKING
1418 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1419 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1420# endif
1421 pPTDst->a[iPTDst] = PteDst;
1422
1423
1424# ifdef PGM_SYNC_DIRTY_BIT
1425 /*
1426 * If the page is not flagged as dirty and is writable, then make it read-only
1427 * at PD level, so we can set the dirty bit when the page is modified.
1428 *
1429 * ASSUMES that page access handlers are implemented on page table entry level.
1430 * Thus we will first catch the dirty access and set PDE.D and restart. If
1431 * there is an access handler, we'll trap again and let it work on the problem.
1432 */
1433 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1434 * As for invlpg, it simply frees the whole shadow PT.
1435 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1436 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1437 {
1438 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
1439 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1440 PdeDst.n.u1Write = 0;
1441 }
1442 else
1443 {
1444 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1445 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1446 }
1447# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1448 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst] = PdeDst;
1449# else /* PAE */
1450 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst] = PdeDst;
1451# endif
1452# endif /* PGM_SYNC_DIRTY_BIT */
1453 Log2(("SyncPage: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%VGp%s\n",
1454 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1455 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1456 }
1457 else
1458 LogFlow(("PGM_GCPHYS_2_PTR %VGp (big) failed with %Vrc\n", GCPhys, rc));
1459 }
1460 return VINF_SUCCESS;
1461 }
1462# ifdef PGM_SYNC_ACCESSED_BIT
1463 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDNAs));
1464#endif
1465 }
1466 else
1467 {
1468 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDOutOfSync));
1469 Log2(("SyncPage: Out-Of-Sync PDE at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1470 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1471 }
1472
1473 /*
1474 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1475 * Yea, I'm lazy.
1476 */
1477 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1478# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1479 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst].u = 0;
1480# else /* PAE */
1481 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst].u = 0;
1482# endif
1483 PGM_INVL_GUEST_TLBS();
1484 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1485
1486#elif PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT
1487
1488# ifdef PGM_SYNC_N_PAGES
1489 /*
1490 * Get the shadow PDE, find the shadow page table in the pool.
1491 */
1492 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1493# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1494 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1495# else /* PAE */
1496 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1497# endif
1498 Assert(PdeDst.n.u1Present);
1499 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1500 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1501
1502# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1503 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1504 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1505# else
1506 const unsigned offPTSrc = 0;
1507# endif
1508
1509 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1510 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1511 {
1512 /*
1513 * This code path is currently only taken when the caller is PGMTrap0eHandler
1514 * for non-present pages!
1515 *
1516 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1517 * deal with locality.
1518 */
1519 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1520 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1521 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1522 iPTDst = 0;
1523 else
1524 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1525 for (; iPTDst < iPTDstEnd; iPTDst++)
1526 {
1527 if (!pPTDst->a[iPTDst].n.u1Present)
1528 {
1529 GSTPTE PteSrc;
1530
1531 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1532
1533 /* Fake the page table entry */
1534 PteSrc.u = GCPtrCurPage;
1535 PteSrc.n.u1Present = 1;
1536 PteSrc.n.u1Dirty = 1;
1537 PteSrc.n.u1Accessed = 1;
1538 PteSrc.n.u1Write = 1;
1539 PteSrc.n.u1User = 1;
1540
1541 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1542
1543 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1544 GCPtrCurPage, PteSrc.n.u1Present,
1545 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1546 PteSrc.n.u1User & PdeSrc.n.u1User,
1547 (uint64_t)PteSrc.u,
1548 (uint64_t)pPTDst->a[iPTDst].u,
1549 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1550 }
1551 }
1552 }
1553 else
1554# endif /* PGM_SYNC_N_PAGES */
1555 {
1556 GSTPTE PteSrc;
1557 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1558 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1559
1560 /* Fake the page table entry */
1561 PteSrc.u = GCPtrCurPage;
1562 PteSrc.n.u1Present = 1;
1563 PteSrc.n.u1Dirty = 1;
1564 PteSrc.n.u1Accessed = 1;
1565 PteSrc.n.u1Write = 1;
1566 PteSrc.n.u1User = 1;
1567 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1568
1569 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1570 GCPtrPage, PteSrc.n.u1Present,
1571 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1572 PteSrc.n.u1User & PdeSrc.n.u1User,
1573 (uint64_t)PteSrc.u,
1574 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1575 }
1576 return VINF_SUCCESS;
1577
1578#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1579 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
1580 return VERR_INTERNAL_ERROR;
1581#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1582}
1583
1584
1585
1586#if PGM_WITH_PAGING(PGM_GST_TYPE)
1587
1588# ifdef PGM_SYNC_DIRTY_BIT
1589
1590/**
1591 * Investigate page fault and handle write protection page faults caused by
1592 * dirty bit tracking.
1593 *
1594 * @returns VBox status code.
1595 * @param pVM VM handle.
1596 * @param uErr Page fault error code.
1597 * @param pPdeDst Shadow page directory entry.
1598 * @param pPdeSrc Guest page directory entry.
1599 * @param GCPtrPage Guest context page address.
1600 */
1601PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCUINTPTR GCPtrPage)
1602{
1603 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
1604 LogFlow(("CheckPageFault: GCPtrPage=%VGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
1605
1606 /*
1607 * Real page fault?
1608 */
1609 if ( (uErr & X86_TRAP_PF_RSVD)
1610 || !pPdeSrc->n.u1Present
1611# if PGM_WITH_NX(PGM_GST_TYPE)
1612 || ((uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
1613# endif
1614 || ((uErr & X86_TRAP_PF_RW) && !pPdeSrc->n.u1Write)
1615 || ((uErr & X86_TRAP_PF_US) && !pPdeSrc->n.u1User) )
1616 {
1617# ifdef IN_GC
1618 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
1619# endif
1620 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
1621 LogFlow(("CheckPageFault: real page fault at %VGv (1)\n", GCPtrPage));
1622
1623 if (pPdeSrc->n.u1Present)
1624 {
1625 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
1626 * See the 2nd case below as well.
1627 */
1628 if (pPdeSrc->b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE))
1629 {
1630 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
1631 }
1632 else
1633 {
1634 /*
1635 * Map the guest page table.
1636 */
1637 PGSTPT pPTSrc;
1638 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
1639 if (VBOX_SUCCESS(rc))
1640 {
1641 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> PAGE_SHIFT) & GST_PT_MASK];
1642 const GSTPTE PteSrc = *pPteSrc;
1643 if (pPteSrc->n.u1Present)
1644 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
1645 }
1646 AssertRC(rc);
1647 }
1648 }
1649 return VINF_EM_RAW_GUEST_TRAP;
1650 }
1651
1652 /*
1653 * First check the easy case where the page directory has been marked read-only to track
1654 * the dirty bit of an emulated BIG page
1655 */
1656 if (pPdeSrc->b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE))
1657 {
1658 /* Mark guest page directory as accessed */
1659 pPdeSrc->b.u1Accessed = 1;
1660
1661 /*
1662 * Only write protection page faults are relevant here.
1663 */
1664 if (uErr & X86_TRAP_PF_RW)
1665 {
1666 /* Mark guest page directory as dirty (BIG page only). */
1667 pPdeSrc->b.u1Dirty = 1;
1668
1669 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
1670 {
1671 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
1672
1673 Assert(pPdeSrc->b.u1Write);
1674
1675 pPdeDst->n.u1Write = 1;
1676 pPdeDst->n.u1Accessed = 1;
1677 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1678 PGM_INVL_BIG_PG(GCPtrPage);
1679 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1680 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
1681 }
1682 }
1683 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1684 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
1685 }
1686 /* else: 4KB page table */
1687
1688 /*
1689 * Map the guest page table.
1690 */
1691 PGSTPT pPTSrc;
1692 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
1693 if (VBOX_SUCCESS(rc))
1694 {
1695 /*
1696 * Real page fault?
1697 */
1698 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> PAGE_SHIFT) & GST_PT_MASK];
1699 const GSTPTE PteSrc = *pPteSrc;
1700 if ( !PteSrc.n.u1Present
1701# if PGM_WITH_NX(PGM_GST_TYPE)
1702 || ((uErr & X86_TRAP_PF_ID) && !PteSrc.n.u1NoExecute)
1703# endif
1704 || ((uErr & X86_TRAP_PF_RW) && !PteSrc.n.u1Write)
1705 || ((uErr & X86_TRAP_PF_US) && !PteSrc.n.u1User)
1706 )
1707 {
1708# ifdef IN_GC
1709 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
1710# endif
1711 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1712 LogFlow(("CheckPageFault: real page fault at %VGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
1713
1714 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
1715 * See the 2nd case above as well.
1716 */
1717 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
1718 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
1719
1720 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1721 return VINF_EM_RAW_GUEST_TRAP;
1722 }
1723 LogFlow(("CheckPageFault: page fault at %VGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
1724
1725 /*
1726 * Set the accessed bits in the page directory and the page table.
1727 */
1728 pPdeSrc->n.u1Accessed = 1;
1729 pPteSrc->n.u1Accessed = 1;
1730
1731 /*
1732 * Only write protection page faults are relevant here.
1733 */
1734 if (uErr & X86_TRAP_PF_RW)
1735 {
1736 /* Write access, so mark guest entry as dirty. */
1737# if defined(IN_GC) && defined(VBOX_WITH_STATISTICS)
1738 if (!pPteSrc->n.u1Dirty)
1739 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtiedPage);
1740 else
1741 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageAlreadyDirty);
1742# endif
1743 pPteSrc->n.u1Dirty = 1;
1744
1745 if (pPdeDst->n.u1Present)
1746 {
1747 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
1748 * Our individual shadow handlers will provide more information and force a fatal exit.
1749 */
1750 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
1751 {
1752 LogRel(("CheckPageFault: write to hypervisor region %VGv\n", GCPtrPage));
1753 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1754 return VINF_SUCCESS;
1755 }
1756
1757 /*
1758 * Map shadow page table.
1759 */
1760 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
1761 if (pShwPage)
1762 {
1763 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1764 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
1765 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
1766 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
1767 {
1768 LogFlow(("DIRTY page trap addr=%VGv\n", GCPtrPage));
1769# ifdef VBOX_STRICT
1770 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
1771 if (pPage)
1772 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
1773 ("Unexpected dirty bit tracking on monitored page %VGv (phys %VGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
1774# endif
1775 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
1776
1777 Assert(pPteSrc->n.u1Write);
1778
1779 pPteDst->n.u1Write = 1;
1780 pPteDst->n.u1Dirty = 1;
1781 pPteDst->n.u1Accessed = 1;
1782 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
1783 PGM_INVL_PG(GCPtrPage);
1784
1785 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1786 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
1787 }
1788 }
1789 else
1790 AssertMsgFailed(("pgmPoolGetPageByHCPhys %VGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
1791 }
1792 }
1793/** @todo Optimize accessed bit emulation? */
1794# ifdef VBOX_STRICT
1795 /*
1796 * Sanity check.
1797 */
1798 else if ( !pPteSrc->n.u1Dirty
1799 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
1800 && pPdeDst->n.u1Present)
1801 {
1802 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
1803 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1804 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
1805 if ( pPteDst->n.u1Present
1806 && pPteDst->n.u1Write)
1807 LogFlow(("Writable present page %VGv not marked for dirty bit tracking!!!\n", GCPtrPage));
1808 }
1809# endif /* VBOX_STRICT */
1810 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1811 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
1812 }
1813 AssertRC(rc);
1814 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1815 return rc;
1816}
1817
1818# endif
1819
1820#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
1821
1822
1823/**
1824 * Sync a shadow page table.
1825 *
1826 * The shadow page table is not present. This includes the case where
1827 * there is a conflict with a mapping.
1828 *
1829 * @returns VBox status code.
1830 * @param pVM VM handle.
1831 * @param iPD Page directory index.
1832 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
1833 * Assume this is a temporary mapping.
1834 * @param GCPtrPage GC Pointer of the page that caused the fault
1835 */
1836PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PGSTPD pPDSrc, RTGCUINTPTR GCPtrPage)
1837{
1838 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
1839 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPtPD[iPDSrc]);
1840 LogFlow(("SyncPT: GCPtrPage=%VGv\n", GCPtrPage));
1841
1842#if PGM_GST_TYPE == PGM_TYPE_32BIT \
1843 || PGM_GST_TYPE == PGM_TYPE_PAE
1844
1845 /*
1846 * Validate input a little bit.
1847 */
1848 Assert(iPDSrc == (GCPtrPage >> GST_PD_SHIFT));
1849# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1850 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
1851# else
1852 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
1853# endif
1854 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1855 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
1856 SHWPDE PdeDst = *pPdeDst;
1857
1858# if PGM_GST_TYPE == PGM_TYPE_32BIT
1859 /*
1860 * Check for conflicts.
1861 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
1862 * HC: Simply resolve the conflict.
1863 */
1864 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1865 {
1866 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1867# ifndef IN_RING3
1868 Log(("SyncPT: Conflict at %VGv\n", GCPtrPage));
1869 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
1870 return VERR_ADDRESS_CONFLICT;
1871# else
1872 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
1873 Assert(pMapping);
1874 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPDSrc);
1875 if (VBOX_FAILURE(rc))
1876 {
1877 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
1878 return rc;
1879 }
1880 PdeDst = *pPdeDst;
1881# endif
1882 }
1883# else /* PGM_GST_TYPE == PGM_TYPE_32BIT */
1884 /* PAE and AMD64 modes are hardware accelerated only, so there are no mappings. */
1885 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
1886# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
1887 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
1888
1889 /*
1890 * Sync page directory entry.
1891 */
1892 int rc = VINF_SUCCESS;
1893 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1894 if (PdeSrc.n.u1Present)
1895 {
1896 /*
1897 * Allocate & map the page table.
1898 */
1899 PSHWPT pPTDst;
1900 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1901 PPGMPOOLPAGE pShwPage;
1902 RTGCPHYS GCPhys;
1903 if (fPageTable)
1904 {
1905 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1906# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1907 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1908 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1909# endif
1910 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
1911 }
1912 else
1913 {
1914 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
1915# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1916 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1917 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1918# endif
1919 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
1920 }
1921 if (rc == VINF_SUCCESS)
1922 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1923 else if (rc == VINF_PGM_CACHED_PAGE)
1924 {
1925 /*
1926 * The PT was cached, just hook it up.
1927 */
1928 if (fPageTable)
1929 PdeDst.u = pShwPage->Core.Key
1930 | (PdeSrc.u & ~(X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
1931 else
1932 {
1933 PdeDst.u = pShwPage->Core.Key
1934 | (PdeSrc.u & ~(X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
1935# ifdef PGM_SYNC_DIRTY_BIT /* (see explanation and assumptions further down.) */
1936 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1937 {
1938 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
1939 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1940 PdeDst.b.u1Write = 0;
1941 }
1942# endif
1943 }
1944 *pPdeDst = PdeDst;
1945 return VINF_SUCCESS;
1946 }
1947 else if (rc == VERR_PGM_POOL_FLUSHED)
1948 return VINF_PGM_SYNC_CR3;
1949 else
1950 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
1951 PdeDst.u &= X86_PDE_AVL_MASK;
1952 PdeDst.u |= pShwPage->Core.Key;
1953
1954# ifdef PGM_SYNC_DIRTY_BIT
1955 /*
1956 * Page directory has been accessed (this is a fault situation, remember).
1957 */
1958 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
1959# endif
1960 if (fPageTable)
1961 {
1962 /*
1963 * Page table - 4KB.
1964 *
1965 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
1966 */
1967 Log2(("SyncPT: 4K %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
1968 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
1969 PGSTPT pPTSrc;
1970 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1971 if (VBOX_SUCCESS(rc))
1972 {
1973 /*
1974 * Start by syncing the page directory entry so CSAM's TLB trick works.
1975 */
1976 PdeDst.u = (PdeDst.u & (X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK))
1977 | (PdeSrc.u & ~(X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
1978 *pPdeDst = PdeDst;
1979
1980 /*
1981 * Directory/page user or supervisor privilege: (same goes for read/write)
1982 *
1983 * Directory Page Combined
1984 * U/S U/S U/S
1985 * 0 0 0
1986 * 0 1 0
1987 * 1 0 0
1988 * 1 1 1
1989 *
1990 * Simple AND operation. Table listed for completeness.
1991 *
1992 */
1993 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4k));
1994# ifdef PGM_SYNC_N_PAGES
1995 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1996 unsigned iPTDst = iPTBase;
1997 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1998 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
1999 iPTDst = 0;
2000 else
2001 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2002# else /* !PGM_SYNC_N_PAGES */
2003 unsigned iPTDst = 0;
2004 const unsigned iPTDstEnd = ELEMENTS(pPTDst->a);
2005# endif /* !PGM_SYNC_N_PAGES */
2006# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2007 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2008 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2009# else
2010 const unsigned offPTSrc = 0;
2011# endif
2012 for (; iPTDst < iPTDstEnd; iPTDst++)
2013 {
2014 const unsigned iPTSrc = iPTDst + offPTSrc;
2015 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2016
2017 if (PteSrc.n.u1Present) /* we've already cleared it above */
2018 {
2019# ifndef IN_RING0
2020 /*
2021 * Assuming kernel code will be marked as supervisor - and not as user level
2022 * and executed using a conforming code selector - And marked as readonly.
2023 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2024 */
2025 PPGMPAGE pPage;
2026 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2027 || !CSAMDoesPageNeedScanning(pVM, (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2028 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2029 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2030 )
2031# endif
2032 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2033 Log2(("SyncPT: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%VGp\n",
2034 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2035 PteSrc.n.u1Present,
2036 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2037 PteSrc.n.u1User & PdeSrc.n.u1User,
2038 (uint64_t)PteSrc.u,
2039 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2040 (PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)));
2041 }
2042 } /* for PTEs */
2043 }
2044 }
2045 else
2046 {
2047 /*
2048 * Big page - 2/4MB.
2049 *
2050 * We'll walk the ram range list in parallel and optimize lookups.
2051 * We will only sync on shadow page table at a time.
2052 */
2053 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4M));
2054
2055 /**
2056 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2057 */
2058
2059 /*
2060 * Start by syncing the page directory entry.
2061 */
2062 PdeDst.u = (PdeDst.u & (X86_PDE_PAE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2063 | (PdeSrc.u & ~(X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2064
2065# ifdef PGM_SYNC_DIRTY_BIT
2066 /*
2067 * If the page is not flagged as dirty and is writable, then make it read-only
2068 * at PD level, so we can set the dirty bit when the page is modified.
2069 *
2070 * ASSUMES that page access handlers are implemented on page table entry level.
2071 * Thus we will first catch the dirty access and set PDE.D and restart. If
2072 * there is an access handler, we'll trap again and let it work on the problem.
2073 */
2074 /** @todo move the above stuff to a section in the PGM documentation. */
2075 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2076 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2077 {
2078 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
2079 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2080 PdeDst.b.u1Write = 0;
2081 }
2082# endif /* PGM_SYNC_DIRTY_BIT */
2083 *pPdeDst = PdeDst;
2084
2085 /*
2086 * Fill the shadow page table.
2087 */
2088 /* Get address and flags from the source PDE. */
2089 SHWPTE PteDstBase;
2090 PteDstBase.u = PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2091
2092 /* Loop thru the entries in the shadow PT. */
2093 const RTGCUINTPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2094 Log2(("SyncPT: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%VGv GCPhys=%VGp %s\n",
2095 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2096 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2097 PPGMRAMRANGE pRam = CTXALLSUFF(pVM->pgm.s.pRamRanges);
2098 unsigned iPTDst = 0;
2099 while (iPTDst < ELEMENTS(pPTDst->a))
2100 {
2101 /* Advance ram range list. */
2102 while (pRam && GCPhys > pRam->GCPhysLast)
2103 pRam = CTXALLSUFF(pRam->pNext);
2104 if (pRam && GCPhys >= pRam->GCPhys)
2105 {
2106 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2107 do
2108 {
2109 /* Make shadow PTE. */
2110 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2111 SHWPTE PteDst;
2112
2113 /* Make sure the RAM has already been allocated. */
2114 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC) /** @todo PAGE FLAGS */
2115 {
2116 if (RT_UNLIKELY(!PGM_PAGE_GET_HCPHYS(pPage)))
2117 {
2118# ifdef IN_RING3
2119 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2120# else
2121 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2122# endif
2123 if (rc != VINF_SUCCESS)
2124 return rc;
2125 }
2126 }
2127
2128 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2129 {
2130 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2131 {
2132 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2133 PteDst.n.u1Write = 0;
2134 }
2135 else
2136 PteDst.u = 0;
2137 }
2138# ifndef IN_RING0
2139 /*
2140 * Assuming kernel code will be marked as supervisor and not as user level and executed
2141 * using a conforming code selector. Don't check for readonly, as that implies the whole
2142 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2143 */
2144 else if ( !PdeSrc.n.u1User
2145 && CSAMDoesPageNeedScanning(pVM, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2146 PteDst.u = 0;
2147# endif
2148 else
2149 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2150# ifdef PGMPOOL_WITH_USER_TRACKING
2151 if (PteDst.n.u1Present)
2152 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, pPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst); /** @todo PAGE FLAGS */
2153# endif
2154 /* commit it */
2155 pPTDst->a[iPTDst] = PteDst;
2156 Log4(("SyncPT: BIG %VGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2157 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2158 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2159
2160 /* advance */
2161 GCPhys += PAGE_SIZE;
2162 iHCPage++;
2163 iPTDst++;
2164 } while ( iPTDst < ELEMENTS(pPTDst->a)
2165 && GCPhys <= pRam->GCPhysLast);
2166 }
2167 else if (pRam)
2168 {
2169 Log(("Invalid pages at %VGp\n", GCPhys));
2170 do
2171 {
2172 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2173 GCPhys += PAGE_SIZE;
2174 iPTDst++;
2175 } while ( iPTDst < ELEMENTS(pPTDst->a)
2176 && GCPhys < pRam->GCPhys);
2177 }
2178 else
2179 {
2180 Log(("Invalid pages at %VGp (2)\n", GCPhys));
2181 for ( ; iPTDst < ELEMENTS(pPTDst->a); iPTDst++)
2182 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2183 }
2184 } /* while more PTEs */
2185 } /* 4KB / 4MB */
2186 }
2187 else
2188 AssertRelease(!PdeDst.n.u1Present);
2189
2190 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2191# ifdef IN_GC
2192 if (VBOX_FAILURE(rc))
2193 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPTFailed));
2194# endif
2195 return rc;
2196
2197#elif PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT
2198
2199 int rc = VINF_SUCCESS;
2200
2201 /*
2202 * Validate input a little bit.
2203 */
2204# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2205 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
2206# else
2207 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2208# endif
2209 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2210 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2211 SHWPDE PdeDst = *pPdeDst;
2212
2213 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2214 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2215
2216 GSTPDE PdeSrc;
2217 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2218 PdeSrc.n.u1Present = 1;
2219 PdeSrc.n.u1Write = 1;
2220 PdeSrc.n.u1Accessed = 1;
2221 PdeSrc.n.u1User = 1;
2222
2223 /*
2224 * Allocate & map the page table.
2225 */
2226 PSHWPT pPTDst;
2227 PPGMPOOLPAGE pShwPage;
2228 RTGCPHYS GCPhys;
2229
2230 /* Virtual address = physical address */
2231 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK_32;
2232 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2233
2234 if ( rc == VINF_SUCCESS
2235 || rc == VINF_PGM_CACHED_PAGE)
2236 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2237 else
2238 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
2239
2240 PdeDst.u &= X86_PDE_AVL_MASK;
2241 PdeDst.u |= pShwPage->Core.Key;
2242 PdeDst.n.u1Present = 1;
2243 *pPdeDst = PdeDst;
2244
2245 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2246 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2247 return rc;
2248
2249#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2250 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2251 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2252 return VERR_INTERNAL_ERROR;
2253#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2254}
2255
2256
2257
2258/**
2259 * Prefetch a page/set of pages.
2260 *
2261 * Typically used to sync commonly used pages before entering raw mode
2262 * after a CR3 reload.
2263 *
2264 * @returns VBox status code.
2265 * @param pVM VM handle.
2266 * @param GCPtrPage Page to invalidate.
2267 */
2268PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage)
2269{
2270#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE) && PGM_SHW_TYPE != PGM_TYPE_AMD64
2271 /*
2272 * Check that all Guest levels thru the PDE are present, getting the
2273 * PD and PDE in the processes.
2274 */
2275 int rc = VINF_SUCCESS;
2276# if PGM_WITH_PAGING(PGM_GST_TYPE)
2277# if PGM_GST_TYPE == PGM_TYPE_32BIT
2278 const unsigned iPDSrc = (RTGCUINTPTR)GCPtrPage >> GST_PD_SHIFT;
2279 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2280# else /* PAE */
2281 unsigned iPDSrc;
2282 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
2283# endif
2284 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2285# else
2286 PGSTPD pPDSrc = NULL;
2287 const unsigned iPDSrc = 0;
2288 GSTPDE PdeSrc;
2289
2290 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2291 PdeSrc.n.u1Present = 1;
2292 PdeSrc.n.u1Write = 1;
2293 PdeSrc.n.u1Accessed = 1;
2294 PdeSrc.n.u1User = 1;
2295# endif
2296
2297# ifdef PGM_SYNC_ACCESSED_BIT
2298 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
2299# else
2300 if (PdeSrc.n.u1Present)
2301# endif
2302 {
2303# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2304 const X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> SHW_PD_SHIFT];
2305# else
2306 const X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> SHW_PD_SHIFT];
2307# endif
2308 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
2309 {
2310 if (!PdeDst.n.u1Present)
2311 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
2312 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2313 else
2314 {
2315 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
2316 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
2317 * makes no sense to prefetch more than one page.
2318 */
2319 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2320 if (VBOX_SUCCESS(rc))
2321 rc = VINF_SUCCESS;
2322 }
2323 }
2324 }
2325 return rc;
2326
2327#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2328
2329 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
2330 return VERR_INTERNAL_ERROR;
2331#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2332}
2333
2334
2335
2336
2337/**
2338 * Syncs a page during a PGMVerifyAccess() call.
2339 *
2340 * @returns VBox status code (informational included).
2341 * @param GCPtrPage The address of the page to sync.
2342 * @param fPage The effective guest page flags.
2343 * @param uErr The trap error code.
2344 */
2345PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR GCPtrPage, unsigned fPage, unsigned uErr)
2346{
2347 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%VGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
2348
2349#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE) && PGM_SHW_TYPE != PGM_TYPE_AMD64
2350
2351# ifndef IN_RING0
2352 if (!(fPage & X86_PTE_US))
2353 {
2354 /*
2355 * Mark this page as safe.
2356 */
2357 /** @todo not correct for pages that contain both code and data!! */
2358 Log(("CSAMMarkPage %VGv; scanned=%d\n", GCPtrPage, true));
2359 CSAMMarkPage(pVM, (RTGCPTR)GCPtrPage, true);
2360 }
2361# endif
2362 /*
2363 * Get guest PD and index.
2364 */
2365
2366# if PGM_WITH_PAGING(PGM_GST_TYPE)
2367# if PGM_GST_TYPE == PGM_TYPE_32BIT
2368 const unsigned iPDSrc = (RTGCUINTPTR)GCPtrPage >> GST_PD_SHIFT;
2369 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2370# else /* PAE */
2371 unsigned iPDSrc;
2372 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
2373# endif
2374# else
2375 PGSTPD pPDSrc = NULL;
2376 const unsigned iPDSrc = 0;
2377# endif
2378 int rc = VINF_SUCCESS;
2379
2380 /*
2381 * First check if the shadow pd is present.
2382 */
2383# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2384 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> SHW_PD_SHIFT];
2385# else
2386 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> SHW_PD_SHIFT];
2387# endif
2388 if (!pPdeDst->n.u1Present)
2389 {
2390 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2391 AssertRC(rc);
2392 if (rc != VINF_SUCCESS)
2393 return rc;
2394 }
2395
2396# if PGM_WITH_PAGING(PGM_GST_TYPE)
2397 /* Check for dirty bit fault */
2398 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
2399 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
2400 Log(("PGMVerifyAccess: success (dirty)\n"));
2401 else
2402 {
2403 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2404#else
2405 {
2406 GSTPDE PdeSrc;
2407 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2408 PdeSrc.n.u1Present = 1;
2409 PdeSrc.n.u1Write = 1;
2410 PdeSrc.n.u1Accessed = 1;
2411 PdeSrc.n.u1User = 1;
2412
2413#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
2414 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
2415 if (uErr & X86_TRAP_PF_US)
2416 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
2417 else /* supervisor */
2418 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
2419
2420 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2421 if (VBOX_SUCCESS(rc))
2422 {
2423 /* Page was successfully synced */
2424 Log2(("PGMVerifyAccess: success (sync)\n"));
2425 rc = VINF_SUCCESS;
2426 }
2427 else
2428 {
2429 Log(("PGMVerifyAccess: access violation for %VGv rc=%d\n", GCPtrPage, rc));
2430 return VINF_EM_RAW_GUEST_TRAP;
2431 }
2432 }
2433 return rc;
2434
2435#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2436
2437 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2438 return VERR_INTERNAL_ERROR;
2439#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2440}
2441
2442
2443#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE
2444# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE
2445/**
2446 * Figures out which kind of shadow page this guest PDE warrants.
2447 *
2448 * @returns Shadow page kind.
2449 * @param pPdeSrc The guest PDE in question.
2450 * @param cr4 The current guest cr4 value.
2451 */
2452DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const GSTPDE *pPdeSrc, uint32_t cr4)
2453{
2454 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
2455 return BTH_PGMPOOLKIND_PT_FOR_PT;
2456 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
2457 //{
2458 // case 0:
2459 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
2460 // case X86_PDE4M_RW:
2461 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
2462 // case X86_PDE4M_US:
2463 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
2464 // case X86_PDE4M_RW | X86_PDE4M_US:
2465 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
2466# if 0
2467 // case X86_PDE4M_PAE_NX:
2468 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
2469 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
2470 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
2471 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
2472 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
2473 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
2474 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
2475# endif
2476 return BTH_PGMPOOLKIND_PT_FOR_BIG;
2477 //}
2478}
2479# endif
2480#endif
2481
2482#undef MY_STAM_COUNTER_INC
2483#define MY_STAM_COUNTER_INC(a) do { } while (0)
2484
2485
2486/**
2487 * Syncs the paging hierarchy starting at CR3.
2488 *
2489 * @returns VBox status code, no specials.
2490 * @param pVM The virtual machine.
2491 * @param cr0 Guest context CR0 register
2492 * @param cr3 Guest context CR3 register
2493 * @param cr4 Guest context CR4 register
2494 * @param fGlobal Including global page directories or not
2495 */
2496PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint32_t cr0, uint32_t cr3, uint32_t cr4, bool fGlobal)
2497{
2498 if (VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
2499 fGlobal = true; /* Change this CR3 reload to be a global one. */
2500
2501 /*
2502 * Update page access handlers.
2503 * The virtual are always flushed, while the physical are only on demand.
2504 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
2505 * have to look into that later because it will have a bad influence on the performance.
2506 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
2507 * bird: Yes, but that won't work for aliases.
2508 */
2509 /** @todo this MUST go away. See #1557. */
2510 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
2511 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
2512 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
2513
2514#ifdef PGMPOOL_WITH_MONITORING
2515 /*
2516 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2517 * Occationally we will have to clear all the shadow page tables because we wanted
2518 * to monitor a page which was mapped by too many shadowed page tables. This operation
2519 * sometimes refered to as a 'lightweight flush'.
2520 */
2521 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2522 pgmPoolMonitorModifiedClearAll(pVM);
2523 else
2524 {
2525# ifdef IN_RING3
2526 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2527 pgmPoolClearAll(pVM);
2528# else
2529 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2530 return VINF_PGM_SYNC_CR3;
2531# endif
2532 }
2533#endif
2534
2535 Assert(fGlobal || (cr4 & X86_CR4_PGE));
2536 MY_STAM_COUNTER_INC(fGlobal ? &pVM->pgm.s.CTXMID(Stat,SyncCR3Global) : &pVM->pgm.s.CTXMID(Stat,SyncCR3NotGlobal));
2537
2538#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE
2539 /*
2540 * Get page directory addresses.
2541 */
2542# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2543 PX86PDE pPDEDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[0];
2544# else
2545 PX86PDEPAE pPDEDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[0];
2546# endif
2547
2548# if PGM_GST_TYPE == PGM_TYPE_32BIT
2549 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2550 Assert(pPDSrc);
2551# ifndef IN_GC
2552 Assert(MMPhysGCPhys2HCVirt(pVM, (RTGCPHYS)(cr3 & GST_CR3_PAGE_MASK), sizeof(*pPDSrc)) == pPDSrc);
2553# endif
2554# endif
2555
2556 /*
2557 * Iterate the page directory.
2558 */
2559 PPGMMAPPING pMapping;
2560 unsigned iPdNoMapping;
2561 const bool fRawR0Enabled = EMIsRawRing0Enabled(pVM);
2562 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2563
2564 /* Only check mappings if they are supposed to be put into the shadow page table. */
2565 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
2566 {
2567 pMapping = pVM->pgm.s.CTXALLSUFF(pMappings);
2568 iPdNoMapping = (pMapping) ? pMapping->GCPtr >> X86_PD_SHIFT : ~0U; /** PAE todo */
2569 }
2570 else
2571 {
2572 pMapping = 0;
2573 iPdNoMapping = ~0U;
2574 }
2575# if PGM_GST_TYPE == PGM_TYPE_PAE
2576 for (unsigned iPDPTRE = 0; iPDPTRE < X86_PG_PAE_PDPTE_ENTRIES; iPDPTRE++)
2577# elif PGM_GST_TYPE == PGM_TYPE_AMD64
2578 for (unsigned iPDPTRE = 0; iPDPTRE < X86_PG_AMD64_PDPTE_ENTRIES; iPDPTRE++)
2579# endif
2580 {
2581# if PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
2582 unsigned iPDSrc;
2583# if PGM_SHW_TYPE == PGM_TYPE_PAE
2584 PX86PDPAE pPDPAE = pVM->pgm.s.CTXMID(ap,PaePDs)[iPDPTRE * X86_PG_PAE_ENTRIES];
2585# else
2586 AssertFailed(); /* @todo */
2587 PX86PDPE pPDPAE = pVM->pgm.s.CTXMID(ap,PaePDs)[iPDPTRE * X86_PG_AMD64_ENTRIES];
2588# endif
2589 PX86PDEPAE pPDEDst = &pPDPAE->a[0];
2590 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, iPDPTRE << X86_PDPTR_SHIFT, &iPDSrc);
2591
2592 if (pPDSrc == NULL)
2593 {
2594 /* PDPTR not present */
2595 pVM->pgm.s.CTXMID(p,PaePDPTR)->a[iPDPTRE].n.u1Present = 0;
2596 continue;
2597 }
2598# endif /* if PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 */
2599 for (unsigned iPD = 0; iPD < ELEMENTS(pPDSrc->a); iPD++)
2600 {
2601# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2602 Assert(&pVM->pgm.s.CTXMID(p,32BitPD)->a[iPD] == pPDEDst);
2603# else
2604 Assert(&pVM->pgm.s.CTXMID(ap,PaePDs)[iPD * 2 / 512]->a[iPD * 2 % 512] == pPDEDst);
2605# endif
2606 register GSTPDE PdeSrc = pPDSrc->a[iPD];
2607 if ( PdeSrc.n.u1Present
2608 && (PdeSrc.n.u1User || fRawR0Enabled))
2609 {
2610# if PGM_GST_TYPE == PGM_TYPE_32BIT
2611 /*
2612 * Check for conflicts with GC mappings.
2613 */
2614 if (iPD == iPdNoMapping)
2615 {
2616 if (pVM->pgm.s.fMappingsFixed)
2617 {
2618 /* It's fixed, just skip the mapping. */
2619 const unsigned cPTs = pMapping->cPTs;
2620 iPD += cPTs - 1;
2621 pPDEDst += cPTs + (PGM_SHW_TYPE != PGM_TYPE_32BIT) * cPTs;
2622 pMapping = pMapping->CTXALLSUFF(pNext);
2623 iPdNoMapping = pMapping ? pMapping->GCPtr >> X86_PD_SHIFT : ~0U;
2624 continue;
2625 }
2626# ifdef IN_RING3
2627 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD);
2628 if (VBOX_FAILURE(rc))
2629 return rc;
2630
2631 /*
2632 * Update iPdNoMapping and pMapping.
2633 */
2634 pMapping = pVM->pgm.s.pMappingsR3;
2635 while (pMapping && pMapping->GCPtr < (iPD << X86_PD_SHIFT))
2636 pMapping = pMapping->pNextR3;
2637 iPdNoMapping = pMapping ? pMapping->GCPtr >> X86_PD_SHIFT : ~0U;
2638# else
2639 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
2640 return VINF_PGM_SYNC_CR3;
2641# endif
2642 }
2643# else /* PGM_GST_TYPE == PGM_TYPE_32BIT */
2644 /* PAE and AMD64 modes are hardware accelerated only, so there are no mappings. */
2645 Assert(iPD != iPdNoMapping);
2646# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
2647 /*
2648 * Sync page directory entry.
2649 *
2650 * The current approach is to allocated the page table but to set
2651 * the entry to not-present and postpone the page table synching till
2652 * it's actually used.
2653 */
2654# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2655 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
2656# else
2657 const unsigned iPdShw = iPD; NOREF(iPdShw);
2658# endif
2659 {
2660 SHWPDE PdeDst = *pPDEDst;
2661 if (PdeDst.n.u1Present)
2662 {
2663 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2664 RTGCPHYS GCPhys;
2665 if ( !PdeSrc.b.u1Size
2666 || !(cr4 & X86_CR4_PSE))
2667 {
2668 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2669# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2670 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2671 GCPhys |= i * (PAGE_SIZE / 2);
2672# endif
2673 }
2674 else
2675 {
2676 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
2677# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2678 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2679 GCPhys |= i * X86_PAGE_2M_SIZE;
2680# endif
2681 }
2682
2683 if ( pShwPage->GCPhys == GCPhys
2684 && pShwPage->enmKind == PGM_BTH_NAME(CalcPageKind)(&PdeSrc, cr4)
2685 && ( pShwPage->fCached
2686 || ( !fGlobal
2687 && ( false
2688# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
2689 || ( (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
2690 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE)) /* global 2/4MB page. */
2691 || ( !pShwPage->fSeenNonGlobal
2692 && (cr4 & X86_CR4_PGE))
2693# endif
2694 )
2695 )
2696 )
2697 && ( (PdeSrc.u & (X86_PDE_US | X86_PDE_RW)) == (PdeDst.u & (X86_PDE_US | X86_PDE_RW))
2698 || ( (cr4 & X86_CR4_PSE)
2699 && ((PdeSrc.u & (X86_PDE_US | X86_PDE4M_PS | X86_PDE4M_D)) | PGM_PDFLAGS_TRACK_DIRTY)
2700 == ((PdeDst.u & (X86_PDE_US | X86_PDE_RW | PGM_PDFLAGS_TRACK_DIRTY)) | X86_PDE4M_PS))
2701 )
2702 )
2703 {
2704# ifdef VBOX_WITH_STATISTICS
2705 if ( !fGlobal
2706 && (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
2707 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE))
2708 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPD));
2709 else if (!fGlobal && !pShwPage->fSeenNonGlobal && (cr4 & X86_CR4_PGE))
2710 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPT));
2711 else
2712 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstCacheHit));
2713# endif /* VBOX_WITH_STATISTICS */
2714 /** @todo a replacement strategy isn't really needed unless we're using a very small pool < 512 pages.
2715 * The whole ageing stuff should be put in yet another set of #ifdefs. For now, let's just skip it. */
2716 //# ifdef PGMPOOL_WITH_CACHE
2717 // pgmPoolCacheUsed(pPool, pShwPage);
2718 //# endif
2719 }
2720 else
2721 {
2722 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPdShw);
2723 pPDEDst->u = 0;
2724 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreed));
2725 }
2726 }
2727 else
2728 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstNotPresent));
2729 pPDEDst++;
2730 }
2731 }
2732 else if (iPD != iPdNoMapping)
2733 {
2734 /*
2735 * Check if there is any page directory to mark not present here.
2736 */
2737# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2738 const unsigned iPdShw = iPD; NOREF(iPdShw);
2739# else
2740 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
2741# endif
2742 {
2743 if (pPDEDst->n.u1Present)
2744 {
2745 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst->u & SHW_PDE_PG_MASK), SHW_POOL_ROOT_IDX, iPdShw);
2746 pPDEDst->u = 0;
2747 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreedSrcNP));
2748 }
2749 pPDEDst++;
2750 }
2751 }
2752 else
2753 {
2754# if PGM_GST_TYPE == PGM_TYPE_32BIT
2755 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2756 const unsigned cPTs = pMapping->cPTs;
2757 if (pVM->pgm.s.fMappingsFixed)
2758 {
2759 /* It's fixed, just skip the mapping. */
2760 pMapping = pMapping->CTXALLSUFF(pNext);
2761 iPdNoMapping = pMapping ? pMapping->GCPtr >> X86_PD_SHIFT : ~0U;
2762 }
2763 else
2764 {
2765 /*
2766 * Check for conflicts for subsequent pagetables
2767 * and advance to the next mapping.
2768 */
2769 iPdNoMapping = ~0U;
2770 unsigned iPT = cPTs;
2771 while (iPT-- > 1)
2772 {
2773 if ( pPDSrc->a[iPD + iPT].n.u1Present
2774 && (pPDSrc->a[iPD + iPT].n.u1User || fRawR0Enabled))
2775 {
2776# ifdef IN_RING3
2777 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD);
2778 if (VBOX_FAILURE(rc))
2779 return rc;
2780
2781 /*
2782 * Update iPdNoMapping and pMapping.
2783 */
2784 pMapping = pVM->pgm.s.CTXALLSUFF(pMappings);
2785 while (pMapping && pMapping->GCPtr < (iPD << X86_PD_SHIFT))
2786 pMapping = pMapping->CTXALLSUFF(pNext);
2787 iPdNoMapping = pMapping ? pMapping->GCPtr >> X86_PD_SHIFT : ~0U;
2788 break;
2789# else
2790 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
2791 return VINF_PGM_SYNC_CR3;
2792# endif
2793 }
2794 }
2795 if (iPdNoMapping == ~0U && pMapping)
2796 {
2797 pMapping = pMapping->CTXALLSUFF(pNext);
2798 if (pMapping)
2799 iPdNoMapping = pMapping->GCPtr >> X86_PD_SHIFT;
2800 }
2801 }
2802
2803 /* advance. */
2804 iPD += cPTs - 1;
2805 pPDEDst += cPTs + (PGM_SHW_TYPE != PGM_TYPE_32BIT) * cPTs;
2806# else /* PGM_GST_TYPE == PGM_TYPE_32BIT */
2807 /* PAE and AMD64 modes are hardware accelerated only, so there are no mappings. */
2808 AssertFailed();
2809# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
2810 }
2811
2812 } /* for iPD */
2813 } /* for each PDPTE (PAE) */
2814
2815 return VINF_SUCCESS;
2816
2817#elif PGM_GST_TYPE == PGM_TYPE_AMD64
2818# if PGM_SHW_TYPE == PGM_TYPE_AMD64
2819//# error not implemented
2820 return VERR_INTERNAL_ERROR;
2821
2822# else /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
2823# error "Guest AMD64 mode, but not the shadow mode - that can't be right!"
2824# endif /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
2825
2826#else /* guest real and protected mode */
2827 return VINF_SUCCESS;
2828#endif
2829}
2830
2831
2832
2833
2834#ifdef VBOX_STRICT
2835#ifdef IN_GC
2836# undef AssertMsgFailed
2837# define AssertMsgFailed Log
2838#endif
2839#ifdef IN_RING3
2840# include <VBox/dbgf.h>
2841
2842/**
2843 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
2844 *
2845 * @returns VBox status code (VINF_SUCCESS).
2846 * @param pVM The VM handle.
2847 * @param cr3 The root of the hierarchy.
2848 * @param crr The cr4, only PAE and PSE is currently used.
2849 * @param fLongMode Set if long mode, false if not long mode.
2850 * @param cMaxDepth Number of levels to dump.
2851 * @param pHlp Pointer to the output functions.
2852 */
2853__BEGIN_DECLS
2854PGMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
2855__END_DECLS
2856
2857#endif
2858
2859/**
2860 * Checks that the shadow page table is in sync with the guest one.
2861 *
2862 * @returns The number of errors.
2863 * @param pVM The virtual machine.
2864 * @param cr3 Guest context CR3 register
2865 * @param cr4 Guest context CR4 register
2866 * @param GCPtr Where to start. Defaults to 0.
2867 * @param cb How much to check. Defaults to everything.
2868 */
2869PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint32_t cr3, uint32_t cr4, RTGCUINTPTR GCPtr, RTGCUINTPTR cb)
2870{
2871 unsigned cErrors = 0;
2872
2873#if PGM_GST_TYPE == PGM_TYPE_32BIT
2874 PPGM pPGM = &pVM->pgm.s;
2875 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
2876 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
2877 RTHCPHYS HCPhys; /* general usage. */
2878 int rc;
2879
2880 /*
2881 * Check that the Guest CR3 and all it's mappings are correct.
2882 */
2883 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
2884 ("Invalid GCPhysCR3=%VGp cr3=%VGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
2885 false);
2886 rc = PGMShwGetPage(pVM, pPGM->pGuestPDGC, NULL, &HCPhysShw);
2887 AssertRCReturn(rc, 1);
2888 HCPhys = NIL_RTHCPHYS;
2889 rc = pgmRamGCPhys2HCPhys(pPGM, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
2890 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%VHp HCPhyswShw=%VHp (cr3)\n", HCPhys, HCPhysShw), false);
2891# ifdef IN_RING3
2892 RTGCPHYS GCPhys;
2893 rc = PGMR3DbgHCPtr2GCPhys(pVM, pPGM->pGuestPDHC, &GCPhys);
2894 AssertRCReturn(rc, 1);
2895 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%VGp cr3=%VGp\n", GCPhys, (RTGCPHYS)cr3), false);
2896# endif
2897 const X86PD *pPDSrc = CTXSUFF(pPGM->pGuestPD);
2898
2899 /*
2900 * Get and check the Shadow CR3.
2901 */
2902# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2903 const X86PD *pPDDst = pPGM->CTXMID(p,32BitPD);
2904 unsigned cPDEs = ELEMENTS(pPDDst->a);
2905# else
2906 const X86PDPAE *pPDDst = pPGM->CTXMID(ap,PaePDs[0]); /* use it as a 2048 entry PD */
2907 unsigned cPDEs = ELEMENTS(pPDDst->a) * ELEMENTS(pPGM->apHCPaePDs);
2908# endif
2909 if (cb != ~(RTGCUINTPTR)0)
2910 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
2911
2912/** @todo call the other two PGMAssert*() functions. */
2913
2914 /*
2915 * Iterate the shadow page directory.
2916 */
2917 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
2918 unsigned iPDDst = GCPtr >> SHW_PD_SHIFT;
2919 cPDEs += iPDDst;
2920 for (;
2921 iPDDst < cPDEs;
2922 iPDDst++, GCPtr += _4G / cPDEs)
2923 {
2924 const SHWPDE PdeDst = pPDDst->a[iPDDst];
2925 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2926 {
2927 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2928 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
2929 {
2930 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
2931 cErrors++;
2932 continue;
2933 }
2934 }
2935 else if ( (PdeDst.u & X86_PDE_P)
2936 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
2937 )
2938 {
2939 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
2940 PPGMPOOLPAGE pPoolPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
2941 if (!pPoolPage)
2942 {
2943 AssertMsgFailed(("Invalid page table address %VGp at %VGv! PdeDst=%#RX64\n",
2944 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
2945 cErrors++;
2946 continue;
2947 }
2948 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
2949
2950 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
2951 {
2952 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %VGv! These flags are not virtualized! PdeDst=%#RX64\n",
2953 GCPtr, (uint64_t)PdeDst.u));
2954 cErrors++;
2955 }
2956
2957 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
2958 {
2959 AssertMsgFailed(("4K PDE reserved flags at %VGv! PdeDst=%#RX64\n",
2960 GCPtr, (uint64_t)PdeDst.u));
2961 cErrors++;
2962 }
2963
2964 const X86PDE PdeSrc = pPDSrc->a[iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)];
2965 if (!PdeSrc.n.u1Present)
2966 {
2967 AssertMsgFailed(("Guest PDE at %VGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
2968 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
2969 cErrors++;
2970 continue;
2971 }
2972
2973 if ( !PdeSrc.b.u1Size
2974 || !(cr4 & X86_CR4_PSE))
2975 {
2976 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
2977# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2978 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
2979# endif
2980 }
2981 else
2982 {
2983 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
2984 {
2985 AssertMsgFailed(("Guest PDE at %VGv is using PSE36 or similar! PdeSrc=%#RX64\n",
2986 GCPtr, (uint64_t)PdeSrc.u));
2987 cErrors++;
2988 continue;
2989 }
2990 GCPhysGst = PdeSrc.u & GST_PDE_BIG_PG_MASK;
2991# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2992 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
2993# endif
2994 }
2995
2996 if ( pPoolPage->enmKind
2997 != (!PdeSrc.b.u1Size || !(cr4 & X86_CR4_PSE) ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
2998 {
2999 AssertMsgFailed(("Invalid shadow page table kind %d at %VGv! PdeSrc=%#RX64\n",
3000 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3001 cErrors++;
3002 }
3003
3004 PPGMPAGE pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3005 if (!pPhysPage)
3006 {
3007 AssertMsgFailed(("Cannot find guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3008 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3009 cErrors++;
3010 continue;
3011 }
3012
3013 if (GCPhysGst != pPoolPage->GCPhys)
3014 {
3015 AssertMsgFailed(("GCPhysGst=%VGp != pPage->GCPhys=%VGp at %VGv\n",
3016 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3017 cErrors++;
3018 continue;
3019 }
3020
3021 if ( !PdeSrc.b.u1Size
3022 || !(cr4 & X86_CR4_PSE))
3023 {
3024 /*
3025 * Page Table.
3026 */
3027 const GSTPT *pPTSrc;
3028 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3029 if (VBOX_FAILURE(rc))
3030 {
3031 AssertMsgFailed(("Cannot map/convert guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3032 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3033 cErrors++;
3034 continue;
3035 }
3036 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3037 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3038 {
3039 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3040 // (This problem will go away when/if we shadow multiple CR3s.)
3041 AssertMsgFailed(("4K PDE flags mismatch at %VGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3042 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3043 cErrors++;
3044 continue;
3045 }
3046 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3047 {
3048 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%VGv PdeDst=%#RX64\n",
3049 GCPtr, (uint64_t)PdeDst.u));
3050 cErrors++;
3051 continue;
3052 }
3053
3054 /* iterate the page table. */
3055# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3056 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3057 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3058# else
3059 const unsigned offPTSrc = 0;
3060# endif
3061 for (unsigned iPT = 0, off = 0;
3062 iPT < ELEMENTS(pPTDst->a);
3063 iPT++, off += PAGE_SIZE)
3064 {
3065 const SHWPTE PteDst = pPTDst->a[iPT];
3066
3067 /* skip not-present entries. */
3068 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3069 continue;
3070 Assert(PteDst.n.u1Present);
3071
3072 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3073 if (!PteSrc.n.u1Present)
3074 {
3075#ifdef IN_RING3
3076 PGMAssertHandlerAndFlagsInSync(pVM);
3077 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3078#endif
3079 AssertMsgFailed(("Out of sync (!P) PTE at %VGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%VGv iPTSrc=%x PdeSrc=%x physpte=%VGp\n",
3080 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3081 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3082 cErrors++;
3083 continue;
3084 }
3085
3086 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3087# if 1 /** @todo sync accessed bit properly... */
3088 fIgnoreFlags |= X86_PTE_A;
3089# endif
3090
3091 /* match the physical addresses */
3092 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3093 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3094
3095# ifdef IN_RING3
3096 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3097 if (VBOX_FAILURE(rc))
3098 {
3099 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3100 {
3101 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3102 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3103 cErrors++;
3104 continue;
3105 }
3106 }
3107 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3108 {
3109 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3110 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3111 cErrors++;
3112 continue;
3113 }
3114# endif
3115
3116 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3117 if (!pPhysPage)
3118 {
3119# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3120 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3121 {
3122 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3123 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3124 cErrors++;
3125 continue;
3126 }
3127# endif
3128 if (PteDst.n.u1Write)
3129 {
3130 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3131 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3132 cErrors++;
3133 }
3134 fIgnoreFlags |= X86_PTE_RW;
3135 }
3136 else if (HCPhysShw != (PGM_PAGE_GET_HCPHYS(pPhysPage) & SHW_PTE_PG_MASK))
3137 {
3138 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3139 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3140 cErrors++;
3141 continue;
3142 }
3143
3144 /* flags */
3145 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3146 {
3147 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3148 {
3149 if (PteDst.n.u1Write)
3150 {
3151 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PteSrc=%#RX64 PteDst=%#RX64\n",
3152 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3153 cErrors++;
3154 continue;
3155 }
3156 fIgnoreFlags |= X86_PTE_RW;
3157 }
3158 else
3159 {
3160 if (PteDst.n.u1Present)
3161 {
3162 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VHp PteSrc=%#RX64 PteDst=%#RX64\n",
3163 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3164 cErrors++;
3165 continue;
3166 }
3167 fIgnoreFlags |= X86_PTE_P;
3168 }
3169 }
3170 else
3171 {
3172 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3173 {
3174 if (PteDst.n.u1Write)
3175 {
3176 AssertMsgFailed(("!DIRTY page at %VGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3177 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3178 cErrors++;
3179 continue;
3180 }
3181 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3182 {
3183 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3184 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3185 cErrors++;
3186 continue;
3187 }
3188 if (PteDst.n.u1Dirty)
3189 {
3190 AssertMsgFailed(("!DIRTY page at %VGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3191 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3192 cErrors++;
3193 }
3194# if 0 /** @todo sync access bit properly... */
3195 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3196 {
3197 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3198 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3199 cErrors++;
3200 }
3201 fIgnoreFlags |= X86_PTE_RW;
3202# else
3203 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3204# endif
3205 }
3206 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3207 {
3208 /* access bit emulation (not implemented). */
3209 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3210 {
3211 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3212 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3213 cErrors++;
3214 continue;
3215 }
3216 if (!PteDst.n.u1Accessed)
3217 {
3218 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3219 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3220 cErrors++;
3221 }
3222 fIgnoreFlags |= X86_PTE_P;
3223 }
3224# ifdef DEBUG_sandervl
3225 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3226# endif
3227 }
3228
3229 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3230 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
3231 )
3232 {
3233 AssertMsgFailed(("Flags mismatch at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3234 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3235 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3236 cErrors++;
3237 continue;
3238 }
3239 } /* foreach PTE */
3240 }
3241 else
3242 {
3243 /*
3244 * Big Page.
3245 */
3246 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | X86_PDE_PAE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
3247 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
3248 {
3249 if (PdeDst.n.u1Write)
3250 {
3251 AssertMsgFailed(("!DIRTY page at %VGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3252 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3253 cErrors++;
3254 continue;
3255 }
3256 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
3257 {
3258 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3259 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3260 cErrors++;
3261 continue;
3262 }
3263# if 0 /** @todo sync access bit properly... */
3264 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
3265 {
3266 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3267 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3268 cErrors++;
3269 }
3270 fIgnoreFlags |= X86_PTE_RW;
3271# else
3272 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3273# endif
3274 }
3275 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3276 {
3277 /* access bit emulation (not implemented). */
3278 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
3279 {
3280 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3281 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3282 cErrors++;
3283 continue;
3284 }
3285 if (!PdeDst.n.u1Accessed)
3286 {
3287 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3288 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3289 cErrors++;
3290 }
3291 fIgnoreFlags |= X86_PTE_P;
3292 }
3293
3294 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
3295 {
3296 AssertMsgFailed(("Flags mismatch (B) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
3297 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
3298 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3299 cErrors++;
3300 }
3301
3302 /* iterate the page table. */
3303 for (unsigned iPT = 0, off = 0;
3304 iPT < ELEMENTS(pPTDst->a);
3305 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
3306 {
3307 const SHWPTE PteDst = pPTDst->a[iPT];
3308
3309 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3310 {
3311 AssertMsgFailed(("The PTE at %VGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
3312 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3313 cErrors++;
3314 }
3315
3316 /* skip not-present entries. */
3317 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
3318 continue;
3319
3320 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3321
3322 /* match the physical addresses */
3323 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
3324
3325# ifdef IN_RING3
3326 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3327 if (VBOX_FAILURE(rc))
3328 {
3329 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3330 {
3331 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3332 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3333 cErrors++;
3334 }
3335 }
3336 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
3337 {
3338 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3339 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3340 cErrors++;
3341 continue;
3342 }
3343# endif
3344
3345 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3346 if (!pPhysPage)
3347 {
3348# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3349 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3350 {
3351 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3352 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3353 cErrors++;
3354 continue;
3355 }
3356# endif
3357 if (PteDst.n.u1Write)
3358 {
3359 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3360 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3361 cErrors++;
3362 }
3363 fIgnoreFlags |= X86_PTE_RW;
3364 }
3365 else if (HCPhysShw != (pPhysPage->HCPhys & X86_PTE_PAE_PG_MASK))
3366 {
3367 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3368 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3369 cErrors++;
3370 continue;
3371 }
3372
3373 /* flags */
3374 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3375 {
3376 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3377 {
3378 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
3379 {
3380 if (PteDst.n.u1Write)
3381 {
3382 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
3383 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3384 cErrors++;
3385 continue;
3386 }
3387 fIgnoreFlags |= X86_PTE_RW;
3388 }
3389 }
3390 else
3391 {
3392 if (PteDst.n.u1Present)
3393 {
3394 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
3395 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3396 cErrors++;
3397 continue;
3398 }
3399 fIgnoreFlags |= X86_PTE_P;
3400 }
3401 }
3402
3403 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3404 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
3405 )
3406 {
3407 AssertMsgFailed(("Flags mismatch (BT) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
3408 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3409 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3410 cErrors++;
3411 continue;
3412 }
3413 } /* foreach PTE */
3414 }
3415 }
3416 /* not present */
3417
3418 } /* forearch PDE */
3419
3420# ifdef DEBUG
3421 if (cErrors)
3422 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
3423# endif
3424
3425#elif PGM_GST_TYPE == PGM_TYPE_PAE
3426//# error not implemented
3427
3428
3429#elif PGM_GST_TYPE == PGM_TYPE_AMD64
3430//# error not implemented
3431
3432/*#else: guest real and protected mode */
3433#endif
3434 return cErrors;
3435}
3436#endif /* VBOX_STRICT */
3437
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette