VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 4668

Last change on this file since 4668 was 4665, checked in by vboxsync, 17 years ago

Moved some of the odd address conversion routines to PGMR3Dbg just to get them out of the way.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 149.4 KB
Line 
1/* $Id: PGMAllBth.h 4665 2007-09-10 13:41:18Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 innotek GmbH
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License as published by the Free Software Foundation,
15 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
16 * distribution. VirtualBox OSE is distributed in the hope that it will
17 * be useful, but WITHOUT ANY WARRANTY of any kind.
18 */
19
20/*******************************************************************************
21* Internal Functions *
22*******************************************************************************/
23__BEGIN_DECLS
24PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
25PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage);
26PGM_BTH_DECL(int, SyncPage)(PVM pVM, VBOXPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr);
27PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PVBOXPDE pPdeSrc, RTGCUINTPTR GCPtrPage);
28PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PVBOXPD pPDSrc, RTGCUINTPTR GCPtrPage);
29PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR Addr, unsigned fPage, unsigned uErr);
30PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage);
31PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint32_t cr0, uint32_t cr3, uint32_t cr4, bool fGlobal);
32#ifdef VBOX_STRICT
33PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint32_t cr3, uint32_t cr4, RTGCUINTPTR GCPtr = 0, RTGCUINTPTR cb = ~(RTGCUINTPTR)0);
34#endif
35#ifdef PGMPOOL_WITH_USER_TRACKING
36DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
37#endif
38__END_DECLS
39
40
41/**
42 * #PF Handler for raw-mode guest execution.
43 *
44 * @returns VBox status code (appropriate for trap handling and GC return).
45 * @param pVM VM Handle.
46 * @param uErr The trap error code.
47 * @param pRegFrame Trap register frame.
48 * @param pvFault The fault address.
49 */
50PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
51{
52#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) && PGM_SHW_TYPE != PGM_TYPE_AMD64
53
54# if PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
55# error "32-bit guest mode is only implemented for 32-bit and PAE shadow modes."
56# endif
57
58# if PGM_SHW_TYPE == PGM_TYPE_PAE
59 /*
60 * Hide the instruction fetch trap indicator for now.
61 */
62 /** @todo NXE will change this and we must fix NXE in the switcher too! */
63 if (uErr & X86_TRAP_PF_ID)
64 {
65 uErr &= ~X86_TRAP_PF_ID;
66 TRPMSetErrorCode(pVM, uErr);
67 }
68# endif
69
70 /*
71 * Get PDs.
72 */
73 int rc;
74# if PGM_WITH_PAGING(PGM_GST_TYPE)
75 PVBOXPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
76 const unsigned iPDSrc = (RTGCUINTPTR)pvFault >> GST_PD_SHIFT;
77# else
78 PVBOXPD pPDSrc = NULL;
79 const unsigned iPDSrc = 0;
80# endif
81
82 const unsigned iPDDst = (RTGCUINTPTR)pvFault >> SHW_PD_SHIFT;
83# if PGM_SHW_TYPE == PGM_TYPE_32BIT
84 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
85# else /* PAE */
86 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]; /* We treat this as a PD with 2048 entries. */
87# endif
88
89# if PGM_WITH_PAGING(PGM_GST_TYPE)
90 /* Determine current privilege level */
91 uint32_t cpl = CPUMGetGuestCPL(pVM, pRegFrame);
92
93# ifdef PGM_SYNC_DIRTY_BIT
94 /*
95 * If we successfully correct the write protection fault due to dirty bit
96 * tracking, or this page fault is a genuine one, then return immediately.
97 */
98 STAM_PROFILE_START(&pVM->pgm.s.StatCheckPageFault, e);
99 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], (RTGCUINTPTR)pvFault);
100 STAM_PROFILE_STOP(&pVM->pgm.s.StatCheckPageFault, e);
101 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
102 || rc == VINF_EM_RAW_GUEST_TRAP)
103 {
104 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution)
105 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatTrap0eDirtyAndAccessedBits : &pVM->pgm.s.StatTrap0eGuestTrap; });
106 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
107 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
108 }
109# endif
110
111 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0ePD[iPDSrc]);
112# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
113
114 /*
115 * A common case is the not-present error caused by lazy page table syncing.
116 *
117 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
118 * so we can safely assume that the shadow PT is present when calling SyncPage later.
119 *
120 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
121 * of mapping conflict and defer to SyncCR3 in R3.
122 * (Again, we do NOT support access handlers for non-present guest pages.)
123 *
124 */
125# if PGM_WITH_PAGING(PGM_GST_TYPE)
126 VBOXPDE PdeSrc = pPDSrc->a[iPDSrc];
127# else
128 VBOXPDE PdeSrc;
129 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
130 PdeSrc.n.u1Present = 1;
131 PdeSrc.n.u1Write = 1;
132 PdeSrc.n.u1Accessed = 1;
133 PdeSrc.n.u1User = 1;
134# endif
135 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
136 && !pPDDst->a[iPDDst].n.u1Present
137 && PdeSrc.n.u1Present
138 )
139
140 {
141 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eSyncPT; });
142 STAM_PROFILE_START(&pVM->pgm.s.StatLazySyncPT, f);
143 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
144 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, (RTGCUINTPTR)pvFault);
145 if (VBOX_SUCCESS(rc))
146 {
147 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
148 return rc;
149 }
150 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
151 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
152 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
153 return VINF_PGM_SYNC_CR3;
154 }
155
156# if PGM_WITH_PAGING(PGM_GST_TYPE)
157 /*
158 * Check if this address is within any of our mappings.
159 *
160 * This is *very* fast and it's gonna save us a bit of effort below and prevent
161 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
162 * (BTW, it's impossible to have physical access handlers in a mapping.)
163 */
164 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
165 {
166 STAM_PROFILE_START(&pVM->pgm.s.StatMapping, a);
167 PPGMMAPPING pMapping = CTXALLSUFF(pVM->pgm.s.pMappings);
168 for ( ; pMapping; pMapping = CTXALLSUFF(pMapping->pNext))
169 {
170 if ((RTGCUINTPTR)pvFault < (RTGCUINTPTR)pMapping->GCPtr)
171 break;
172 if ((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pMapping->GCPtr < pMapping->cb)
173 {
174 /*
175 * The first thing we check is if we've got an undetected conflict.
176 */
177 if (!pVM->pgm.s.fMappingsFixed)
178 {
179 unsigned iPT = pMapping->cPTs;
180 while (iPT-- > 0)
181 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
182 {
183 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eConflicts);
184 Log(("Trap0e: Detected Conflict %VGv-%VGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
185 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
186 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
187 return VINF_PGM_SYNC_CR3;
188 }
189 }
190
191 /*
192 * Check if the fault address is in a virtual page access handler range.
193 */
194 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
195 if ( pCur
196 && pCur->enmType != PGMVIRTHANDLERTYPE_EIP
197 && (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
198 && ( uErr & X86_TRAP_PF_RW
199 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
200 && pCur->enmType != PGMVIRTHANDLERTYPE_HYPERVISOR) ) ) /** r=bird: <- this is probably wrong. */
201 {
202# ifdef IN_GC
203 STAM_PROFILE_START(&pCur->Stat, h);
204 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
205 STAM_PROFILE_STOP(&pCur->Stat, h);
206# else
207 AssertFailed();
208 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
209# endif
210 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eMapHandler);
211 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
212 return rc;
213 }
214
215 /*
216 * Check if the EIP is in a virtual page access handler range.
217 */
218 if (cpl == 0)
219 {
220 RTGCPTR pvEIP;
221 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &pvEIP);
222 if (VBOX_SUCCESS(rc))
223 {
224 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvEIP);
225 if ( pCur
226 && pCur->enmType == PGMVIRTHANDLERTYPE_EIP
227 && (RTGCUINTPTR)pvEIP - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
228 {
229# ifdef IN_GC
230 STAM_PROFILE_START(&pCur->Stat, h);
231 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvEIP - (RTGCUINTPTR)pCur->GCPtr);
232 STAM_PROFILE_STOP(&pCur->Stat, h);
233# else
234 AssertFailed();
235 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
236# endif
237 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eMapHandler);
238 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
239 return rc;
240 }
241 }
242 }
243
244 /*
245 * Pretend we're not here and let the guest handle the trap.
246 */
247 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
248 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eMap);
249 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
250 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
251 return VINF_EM_RAW_GUEST_TRAP;
252 }
253 }
254 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
255 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
256# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
257
258 /*
259 * Check if this fault address is flagged for special treatment,
260 * which means we'll have to figure out the physical address and
261 * check flags associated with it.
262 *
263 * ASSUME that we can limit any special access handling to pages
264 * in page tables which the guest believes to be present.
265 */
266 if (PdeSrc.n.u1Present)
267 {
268 RTGCPHYS GCPhys = ~0U;
269
270# if PGM_WITH_PAGING(PGM_GST_TYPE)
271 uint32_t cr4 = CPUMGetGuestCR4(pVM);
272 if ( PdeSrc.b.u1Size
273 && (cr4 & X86_CR4_PSE))
274 GCPhys = (PdeSrc.u & X86_PDE4M_PG_MASK)
275 | ((RTGCPHYS)pvFault & (PAGE_OFFSET_MASK_BIG ^ PAGE_OFFSET_MASK));
276 else
277 {
278 PVBOXPT pPTSrc;
279# ifdef IN_GC
280 rc = PGMGCDynMapGCPage(pVM, PdeSrc.u & X86_PDE_PG_MASK, (void **)&pPTSrc);
281# else
282 pPTSrc = (PVBOXPT)MMPhysGCPhys2HCVirt(pVM, PdeSrc.u & X86_PDE_PG_MASK, sizeof(*pPTSrc));
283 if (pPTSrc == 0)
284 rc = VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS;
285# endif
286 if (VBOX_SUCCESS(rc))
287 {
288 unsigned iPTESrc = ((RTGCUINTPTR)pvFault >> PAGE_SHIFT) & PTE_MASK;
289 if (pPTSrc->a[iPTESrc].n.u1Present)
290 GCPhys = pPTSrc->a[iPTESrc].u & X86_PTE_PG_MASK;
291 }
292 }
293# else
294 /* No paging so the fault address is the physical address */
295 GCPhys = (RTGCPHYS)((RTGCUINTPTR)pvFault & ~PAGE_OFFSET_MASK);
296# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
297
298 /*
299 * If we have a GC address we'll check if it has any flags set.
300 */
301 if (GCPhys != ~0U)
302 {
303 STAM_PROFILE_START(&pVM->pgm.s.StatHandlers, b);
304
305 PPGMPAGE pPage;
306 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
307 if (VBOX_SUCCESS(rc))
308 {
309 if (pPage->HCPhys & (MM_RAM_FLAGS_PHYSICAL_HANDLER | MM_RAM_FLAGS_VIRTUAL_HANDLER)) /** @todo PAGE FLAGS */
310 {
311 if (pPage->HCPhys & MM_RAM_FLAGS_PHYSICAL_HANDLER) /** @todo PAGE FLAGS */
312 {
313 /*
314 * Physical page access handler.
315 */
316 const RTGCPHYS GCPhysFault = GCPhys | ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK);
317 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->PhysHandlers, GCPhysFault);
318 if (pCur)
319 {
320# ifdef PGM_SYNC_N_PAGES
321 /*
322 * If the region is write protected and we got a page not present fault, then sync
323 * the pages. If the fault was caused by a read, then restart the instruction.
324 * In case of write access continue to the GC write handler.
325 *
326 * ASSUMES that there is only one handler per page or that they have similar write properties.
327 */
328 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
329 && !(uErr & X86_TRAP_PF_P))
330 {
331 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
332 if ( VBOX_FAILURE(rc)
333 || !(uErr & X86_TRAP_PF_RW)
334 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
335 {
336 AssertRC(rc);
337 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
338 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
339 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
340 return rc;
341 }
342 }
343# endif
344
345 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
346 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
347 ("Unexpected trap for physical handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
348
349#if defined(IN_GC) || defined(IN_RING0)
350 if (CTXALLSUFF(pCur->pfnHandler))
351 {
352 STAM_PROFILE_START(&pCur->Stat, h);
353 rc = pCur->CTXALLSUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, CTXALLSUFF(pCur->pvUser));
354 STAM_PROFILE_STOP(&pCur->Stat, h);
355 }
356 else
357#endif
358 rc = VINF_EM_RAW_EMULATE_INSTR;
359 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersPhysical);
360 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
361 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndPhys; });
362 return rc;
363 }
364 }
365# if PGM_WITH_PAGING(PGM_GST_TYPE)
366 else
367 {
368# ifdef PGM_SYNC_N_PAGES
369 /*
370 * If the region is write protected and we got a page not present fault, then sync
371 * the pages. If the fault was caused by a read, then restart the instruction.
372 * In case of write access continue to the GC write handler.
373 */
374 if ( (pPage->HCPhys & (MM_RAM_FLAGS_VIRTUAL_WRITE | MM_RAM_FLAGS_VIRTUAL_ALL)) == MM_RAM_FLAGS_VIRTUAL_WRITE /** @todo PAGE FLAGS */
375 && !(uErr & X86_TRAP_PF_P))
376 {
377 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
378 if ( VBOX_FAILURE(rc)
379 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
380 || !(uErr & X86_TRAP_PF_RW))
381 {
382 AssertRC(rc);
383 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
384 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
385 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndVirt; });
386 return rc;
387 }
388 }
389# endif
390 /*
391 * Ok, it's an virtual page access handler.
392 *
393 * Since it's faster to search by address, we'll do that first
394 * and then retry by GCPhys if that fails.
395 */
396 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
397 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
398 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
399 */
400 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
401 if (pCur)
402 {
403 AssertMsg(!((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
404 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
405 || !(uErr & X86_TRAP_PF_P)
406 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
407 ("Unexpected trap for virtual handler: %VGv (phys=%VGp) HCPhys=%HGp uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
408
409 if ( pCur->enmType != PGMVIRTHANDLERTYPE_EIP
410 && (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
411 && ( uErr & X86_TRAP_PF_RW
412 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
413 && pCur->enmType != PGMVIRTHANDLERTYPE_HYPERVISOR) ) ) /** @todo r=bird: _HYPERVISOR is impossible here because of mapping check. */
414 {
415# ifdef IN_GC
416 STAM_PROFILE_START(&pCur->Stat, h);
417 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
418 STAM_PROFILE_STOP(&pCur->Stat, h);
419# else
420 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
421# endif
422 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtual);
423 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
424 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
425 return rc;
426 }
427 /* Unhandled part of a monitored page */
428 }
429 else
430 {
431 /* Check by physical address. */
432 PPGMVIRTHANDLER pCur;
433 unsigned iPage;
434 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK),
435 &pCur, &iPage);
436 Assert(VBOX_SUCCESS(rc) || !pCur);
437 if ( pCur
438 && pCur->enmType != PGMVIRTHANDLERTYPE_EIP
439 && ( uErr & X86_TRAP_PF_RW
440 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
441 && pCur->enmType != PGMVIRTHANDLERTYPE_HYPERVISOR) ) )
442 {
443 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
444# ifdef IN_GC
445 RTGCUINTPTR off = (iPage << PAGE_SHIFT) + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK) - ((RTGCUINTPTR)pCur->GCPtr & PAGE_OFFSET_MASK);
446 Assert(off < pCur->cb);
447 STAM_PROFILE_START(&pCur->Stat, h);
448 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, off);
449 STAM_PROFILE_STOP(&pCur->Stat, h);
450# else
451 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
452# endif
453 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualByPhys);
454 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
455 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
456 return rc;
457 }
458 }
459 }
460# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
461
462 /*
463 * There is a handled area of the page, but this fault doesn't belong to it.
464 * We must emulate the instruction.
465 *
466 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
467 * we first check if this was a page-not-present fault for a page with only
468 * write access handlers. Restart the instruction if it wasn't a write access.
469 */
470 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersUnhandled);
471
472 if ( !(pPage->HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL)) /** @todo PAGE FLAGS */
473 && !(uErr & X86_TRAP_PF_P))
474 {
475 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
476 if ( VBOX_FAILURE(rc)
477 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
478 || !(uErr & X86_TRAP_PF_RW))
479 {
480 AssertRC(rc);
481 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
482 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
483 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
484 return rc;
485 }
486 }
487
488 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
489 * It's writing to an unhandled part of the LDT page several million times.
490 */
491 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
492 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d HCPhys=%VHp%s%s\n",
493 rc, pPage->HCPhys, pPage->HCPhys & MM_RAM_FLAGS_PHYSICAL_HANDLER ? " phys" : "",
494 pPage->HCPhys & MM_RAM_FLAGS_VIRTUAL_HANDLER ? " virt" : ""));
495 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
496 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndUnhandled; });
497 return rc;
498 } /* if any kind of handler */
499
500# if PGM_WITH_PAGING(PGM_GST_TYPE)
501 if (uErr & X86_TRAP_PF_P)
502 {
503 /*
504 * The page isn't marked, but it might still be monitored by a virtual page access handler.
505 * (ASSUMES no temporary disabling of virtual handlers.)
506 */
507 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
508 * we should correct both the shadow page table and physical memory flags, and not only check for
509 * accesses within the handler region but for access to pages with virtual handlers. */
510 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
511 if (pCur)
512 {
513 AssertMsg( !((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
514 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
515 || !(uErr & X86_TRAP_PF_P)
516 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
517 ("Unexpected trap for virtual handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
518
519 if ( pCur->enmType != PGMVIRTHANDLERTYPE_EIP
520 && (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
521 && ( uErr & X86_TRAP_PF_RW
522 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
523 && pCur->enmType != PGMVIRTHANDLERTYPE_HYPERVISOR) ) ) /** @todo r=bird: _HYPERVISOR is impossible here because of mapping check. */
524 {
525# ifdef IN_GC
526 STAM_PROFILE_START(&pCur->Stat, h);
527 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
528 STAM_PROFILE_STOP(&pCur->Stat, h);
529# else
530 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
531# endif
532 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualUnmarked);
533 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
534 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
535 return rc;
536 }
537 }
538 }
539# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
540 }
541 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
542
543# ifdef PGM_OUT_OF_SYNC_IN_GC
544 /*
545 * We are here only if page is present in Guest page tables and trap is not handled
546 * by our handlers.
547 * Check it for page out-of-sync situation.
548 */
549 STAM_PROFILE_START(&pVM->pgm.s.StatOutOfSync, c);
550
551 if (!(uErr & X86_TRAP_PF_P))
552 {
553 /*
554 * Page is not present in our page tables.
555 * Try to sync it!
556 * BTW, fPageShw is invalid in this branch!
557 */
558 if (uErr & X86_TRAP_PF_US)
559 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
560 else /* supervisor */
561 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
562
563# if defined(LOG_ENABLED) && !defined(IN_RING0)
564 RTGCPHYS GCPhys;
565 uint64_t fPageGst;
566 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
567 Log(("Page out of sync: %p eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%VGp scan=%d\n",
568 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTGCPTR)pRegFrame->eip)));
569# endif /* LOG_ENABLED */
570
571# if PGM_WITH_PAGING(PGM_GST_TYPE) && !defined(IN_RING0)
572 if (cpl == 0)
573 {
574 uint64_t fPageGst;
575 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
576 if ( VBOX_SUCCESS(rc)
577 && !(fPageGst & X86_PTE_US))
578 {
579 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
580 if ( pvFault == (RTGCPTR)pRegFrame->eip
581 || (RTGCUINTPTR)pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
582# ifdef CSAM_DETECT_NEW_CODE_PAGES
583 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
584 && CSAMDoesPageNeedScanning(pVM, (RTGCPTR)pRegFrame->eip)) /* any new code we encounter here */
585# endif /* CSAM_DETECT_NEW_CODE_PAGES */
586 )
587 {
588 LogFlow(("CSAMExecFault %VGv\n", pRegFrame->eip));
589 rc = CSAMExecFault(pVM, (RTGCPTR)pRegFrame->eip);
590 if (rc != VINF_SUCCESS)
591 {
592 /*
593 * CSAM needs to perform a job in ring 3.
594 *
595 * Sync the page before going to the host context; otherwise we'll end up in a loop if
596 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
597 */
598 LogFlow(("CSAM ring 3 job\n"));
599 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
600 AssertRC(rc2);
601
602 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
603 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eCSAM; });
604 return rc;
605 }
606 }
607# ifdef CSAM_DETECT_NEW_CODE_PAGES
608 else
609 if ( uErr == X86_TRAP_PF_RW
610 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
611 && pRegFrame->ecx < 0x10000
612 )
613 {
614 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
615 * to detect loading of new code pages.
616 */
617
618 /*
619 * Decode the instruction.
620 */
621 RTGCPTR PC;
622 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
623 if (rc == VINF_SUCCESS)
624 {
625 DISCPUSTATE Cpu;
626 uint32_t cbOp;
627 rc = EMInterpretDisasOneEx(pVM, (RTGCUINTPTR)PC, pRegFrame, &Cpu, &cbOp);
628
629 /* For now we'll restrict this to rep movsw/d instructions */
630 if ( rc == VINF_SUCCESS
631 && Cpu.pCurInstr->opcode == OP_MOVSWD
632 && (Cpu.prefix & PREFIX_REP))
633 {
634 CSAMMarkPossibleCodePage(pVM, pvFault);
635 }
636 }
637 }
638# endif /* CSAM_DETECT_NEW_CODE_PAGES */
639
640 /*
641 * Mark this page as safe.
642 */
643 /** @todo not correct for pages that contain both code and data!! */
644 Log2(("CSAMMarkPage %p; scanned=%d\n", pvFault, true));
645 CSAMMarkPage(pVM, pvFault, true);
646 }
647 }
648# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
649 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
650 if (VBOX_SUCCESS(rc))
651 {
652 /* The page was successfully synced, return to the guest. */
653 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
654 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSync; });
655 return VINF_SUCCESS;
656 }
657 }
658 else
659 {
660 /*
661 * A side effect of not flushing global PDEs are out of sync pages due
662 * to physical monitored regions, that are no longer valid.
663 * Assume for now it only applies to the read/write flag
664 */
665 if (VBOX_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
666 {
667 if (uErr & X86_TRAP_PF_US)
668 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
669 else /* supervisor */
670 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
671
672
673 /*
674 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the page is not present, which is not true in this case.
675 */
676 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
677 if (VBOX_SUCCESS(rc))
678 {
679 /*
680 * Page was successfully synced, return to guest.
681 */
682# ifdef VBOX_STRICT
683 RTGCPHYS GCPhys;
684 uint64_t fPageGst;
685 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
686 Assert(VBOX_SUCCESS(rc) && fPageGst & X86_PTE_RW);
687 LogFlow(("Obsolete physical monitor page out of sync %VGv - phys %VGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
688
689 uint64_t fPageShw;
690 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
691 Assert(VBOX_SUCCESS(rc) && fPageShw & X86_PTE_RW);
692# endif /* VBOX_STRICT */
693 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
694 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncObsHnd; });
695 return VINF_SUCCESS;
696 }
697 }
698
699# if PGM_WITH_PAGING(PGM_GST_TYPE)
700# ifdef VBOX_STRICT
701 /*
702 * Check for VMM page flags vs. Guest page flags consistency.
703 * Currently only for debug purposes.
704 */
705 if (VBOX_SUCCESS(rc))
706 {
707 /* Get guest page flags. */
708 uint64_t fPageGst;
709 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
710 if (VBOX_SUCCESS(rc))
711 {
712 uint64_t fPageShw;
713 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
714
715 /*
716 * Compare page flags.
717 * Note: we have AVL, A, D bits desynched.
718 */
719 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
720 ("Page flags mismatch! pvFault=%p GCPhys=%VGp fPageShw=%08llx fPageGst=%08llx\n", pvFault, GCPhys, fPageShw, fPageGst));
721 }
722 else
723 AssertMsgFailed(("PGMGstGetPage rc=%Vrc\n", rc));
724 }
725 else
726 AssertMsgFailed(("PGMGCGetPage rc=%Vrc\n", rc));
727# endif /* VBOX_STRICT */
728# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
729 }
730 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
731# endif /* PGM_OUT_OF_SYNC_IN_GC */
732 }
733 else
734 {
735 /*
736 * Page not present in Guest OS or invalid page table address.
737 * This is potential virtual page access handler food.
738 *
739 * For the present we'll say that our access handlers don't
740 * work for this case - we've already discarded the page table
741 * not present case which is identical to this.
742 *
743 * When we perchance find we need this, we will probably have AVL
744 * trees (offset based) to operate on and we can measure their speed
745 * agains mapping a page table and probably rearrange this handling
746 * a bit. (Like, searching virtual ranges before checking the
747 * physical address.)
748 */
749 }
750 }
751
752
753# if PGM_WITH_PAGING(PGM_GST_TYPE)
754 /*
755 * Check if it's in a EIP based virtual page access handler range.
756 * This is only used for supervisor pages in flat mode.
757 */
758 /** @todo this stuff is completely broken by the out-of-sync stuff. since we don't use this stuff, that's not really a problem yet. */
759 STAM_PROFILE_START(&pVM->pgm.s.StatEIPHandlers, d);
760 if (cpl == 0)
761 {
762 RTGCPTR pvEIP;
763 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &pvEIP);
764 if ( VBOX_SUCCESS(rc)
765 && pvEIP == (RTGCPTR)pRegFrame->eip)
766 {
767 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvEIP);
768 if ( pCur
769 && pCur->enmType == PGMVIRTHANDLERTYPE_EIP
770 && (RTGCUINTPTR)pvEIP - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
771 {
772 LogFlow(("EIP handler\n"));
773# ifdef IN_GC
774 STAM_PROFILE_START(&pCur->Stat, h);
775 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvEIP - (RTGCUINTPTR)pCur->GCPtr);
776 STAM_PROFILE_STOP(&pCur->Stat, h);
777# else
778 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
779# endif
780 STAM_PROFILE_STOP(&pVM->pgm.s.StatEIPHandlers, d);
781 return rc;
782 }
783 }
784 }
785 STAM_PROFILE_STOP(&pVM->pgm.s.StatEIPHandlers, d);
786
787 /*
788 * Conclusion, this is a guest trap.
789 */
790 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
791 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eUnhandled);
792 return VINF_EM_RAW_GUEST_TRAP;
793# else
794 /* present, but not a monitored page; perhaps the guest is probing physical memory */
795 return VINF_EM_RAW_EMULATE_INSTR;
796# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
797
798
799#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
800
801 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
802 return VERR_INTERNAL_ERROR;
803#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
804}
805
806
807/**
808 * Emulation of the invlpg instruction.
809 *
810 *
811 * @returns VBox status code.
812 *
813 * @param pVM VM handle.
814 * @param GCPtrPage Page to invalidate.
815 *
816 * @remark ASSUMES that the guest is updating before invalidating. This order
817 * isn't required by the CPU, so this is speculative and could cause
818 * trouble.
819 *
820 * @todo Flush page or page directory only if necessary!
821 * @todo Add a #define for simply invalidating the page.
822 */
823PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage)
824{
825#if PGM_GST_TYPE == PGM_TYPE_32BIT
826
827 LogFlow(("InvalidatePage %x\n", GCPtrPage));
828# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE
829 /*
830 * Get the shadow PD entry and skip out if this PD isn't present.
831 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
832 */
833 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
834# if PGM_SHW_TYPE == PGM_TYPE_32BIT
835 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
836# else
837 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs[0])->a[iPDDst];
838# endif
839 const SHWPDE PdeDst = *pPdeDst;
840 if (!PdeDst.n.u1Present)
841 {
842 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
843 return VINF_SUCCESS;
844 }
845
846 /*
847 * Get the guest PD entry and calc big page.
848 */
849 PVBOXPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
850 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
851 VBOXPDE PdeSrc = pPDSrc->a[iPDSrc];
852 const uint32_t cr4 = CPUMGetGuestCR4(pVM);
853 const bool fIsBigPage = PdeSrc.b.u1Size && (cr4 & X86_CR4_PSE);
854
855# ifdef IN_RING3
856 /*
857 * If a CR3 Sync is pending we may ignore the invalidate page operation
858 * depending on the kind of sync and if it's a global page or not.
859 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
860 */
861# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
862 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
863 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
864 && fIsBigPage
865 && PdeSrc.b.u1Global
866 && (cr4 & X86_CR4_PGE)
867 )
868 )
869# else
870 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
871# endif
872 {
873 STAM_COUNTER_INC(&pVM->pgm.s.StatHCInvalidatePageSkipped);
874 return VINF_SUCCESS;
875 }
876# endif /* IN_RING3 */
877
878
879 /*
880 * Deal with the Guest PDE.
881 */
882 int rc = VINF_SUCCESS;
883 if (PdeSrc.n.u1Present)
884 {
885 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
886 {
887 /*
888 * Conflict - Let SyncPT deal with it to avoid duplicate code.
889 */
890 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
891 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
892 }
893 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
894 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
895 {
896 /*
897 * Mark not present so we can resync the PDE when it's used.
898 */
899 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
900 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
901 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
902 pPdeDst->u = 0;
903 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
904 PGM_INVL_GUEST_TLBS();
905 }
906# ifdef PGM_SYNC_ACCESSED_BIT
907 else if (!PdeSrc.n.u1Accessed)
908 {
909 /*
910 * Mark not present so we can set the accessed bit.
911 */
912 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
913 pPdeDst->u = 0;
914 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
915 PGM_INVL_GUEST_TLBS();
916 }
917# endif
918 else if (!fIsBigPage)
919 {
920 /*
921 * 4KB - page.
922 */
923 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
924 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
925# if PGM_SHW_TYPE != PGM_TYPE_32BIT
926 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
927# endif
928 if (pShwPage->GCPhys == GCPhys)
929 {
930#if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
931 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
932 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
933 if (pPT->a[iPTEDst].n.u1Present)
934 {
935# ifdef PGMPOOL_WITH_USER_TRACKING
936 /* This is very unlikely with caching/monitoring enabled. */
937 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
938# endif
939 pPT->a[iPTEDst].u = 0;
940 }
941#else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
942 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
943 if (VBOX_SUCCESS(rc))
944 rc = VINF_SUCCESS;
945#endif
946 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4KBPages));
947 PGM_INVL_PG(GCPtrPage);
948 }
949 else
950 {
951 /*
952 * The page table address changed.
953 */
954 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%VGp iPDDst=%#x\n",
955 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
956 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
957 pPdeDst->u = 0;
958 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
959 PGM_INVL_GUEST_TLBS();
960 }
961 }
962 else
963 {
964 /*
965 * 4MB - page.
966 */
967 /* Before freeing the page, check if anything really changed. */
968 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
969 RTGCPHYS GCPhys = PdeSrc.u & X86_PDE4M_PG_MASK;
970# if PGM_SHW_TYPE != PGM_TYPE_32BIT
971 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
972# endif
973 if ( pShwPage->GCPhys == GCPhys
974 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
975 {
976 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
977 /** @todo PAT */
978# ifdef PGM_SYNC_DIRTY_BIT
979 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
980 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
981 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
982 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
983# else
984 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
985 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD)))
986# endif
987 {
988 LogFlow(("Skipping flush for big page containing %VGv (PD=%X)-> nothing has changed!\n", GCPtrPage, iPDSrc));
989 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPagesSkip));
990 return VINF_SUCCESS;
991 }
992 }
993
994 /*
995 * Ok, the page table is present and it's been changed in the guest.
996 * If we're in host context, we'll just mark it as not present taking the lazy approach.
997 * We could do this for some flushes in GC too, but we need an algorithm for
998 * deciding which 4MB pages containing code likely to be executed very soon.
999 */
1000 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1001 pPdeDst->u = 0;
1002 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPages));
1003 DUMP_PDE_BIG("PGMInvalidatePage", iPDSrc, PdeSrc);
1004 PGM_INVL_BIG_PG(GCPtrPage);
1005 }
1006 }
1007 else
1008 {
1009 /*
1010 * Page directory is not present, mark shadow PDE not present.
1011 */
1012 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1013 {
1014 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1015 pPdeDst->u = 0;
1016 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
1017 PGM_INVL_PG(GCPtrPage);
1018 }
1019 else
1020 {
1021 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1022 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDMappings));
1023 }
1024 }
1025
1026 return rc;
1027
1028# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1029# error "Guest 32-bit mode and shadow AMD64 mode doesn't add up!"
1030# endif
1031 return VINF_SUCCESS;
1032
1033#elif PGM_GST_TYPE == PGM_TYPE_PAE
1034# if PGM_SHW_TYPE == PGM_TYPE_PAE
1035//# error not implemented
1036 return VERR_INTERNAL_ERROR;
1037
1038# else /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
1039# error "Guest PAE mode, but not the shadow mode ; 32bit - maybe, but amd64 no."
1040# endif /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
1041
1042#elif PGM_GST_TYPE == PGM_TYPE_AMD64
1043# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1044//# error not implemented
1045 return VERR_INTERNAL_ERROR;
1046
1047# else /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
1048# error "Guest AMD64 mode, but not the shadow mode - that can't be right!"
1049# endif /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
1050
1051#else /* guest real and protected mode */
1052 /* There's no such thing when paging is disabled. */
1053 return VINF_SUCCESS;
1054#endif
1055}
1056
1057
1058#ifdef PGMPOOL_WITH_USER_TRACKING
1059/**
1060 * Update the tracking of shadowed pages.
1061 *
1062 * @param pVM The VM handle.
1063 * @param pShwPage The shadow page.
1064 * @param HCPhys The physical page we is being dereferenced.
1065 */
1066DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1067{
1068# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1069 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1070 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%VHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1071
1072 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1073 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1074 * 2. write protect all shadowed pages. I.e. implement caching.
1075 */
1076 /*
1077 * Find the guest address.
1078 */
1079 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTXSUFF(pRamRanges);
1080 pRam;
1081 pRam = pRam->CTXSUFF(pNext))
1082 {
1083 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1084 while (iPage-- > 0)
1085 {
1086 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1087 {
1088 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1089 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1090 pShwPage->cPresent--;
1091 pPool->cPresent--;
1092 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1093 return;
1094 }
1095 }
1096 }
1097
1098 for (;;)
1099 AssertReleaseMsgFailed(("HCPhys=%VHp wasn't found!\n", HCPhys));
1100# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1101 pShwPage->cPresent--;
1102 pVM->pgm.s.CTXSUFF(pPool)->cPresent--;
1103# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1104}
1105
1106
1107/**
1108 * Update the tracking of shadowed pages.
1109 *
1110 * @param pVM The VM handle.
1111 * @param pShwPage The shadow page.
1112 * @param u16 The top 16-bit of the pPage->HCPhys.
1113 * @param pPage Pointer to the guest page. this will be modified.
1114 * @param iPTDst The index into the shadow table.
1115 */
1116DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1117{
1118# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1119 /*
1120 * We're making certain assumptions about the placement of cRef and idx.
1121 */
1122 Assert(MM_RAM_FLAGS_IDX_SHIFT == 48);
1123 Assert(MM_RAM_FLAGS_CREFS_SHIFT > MM_RAM_FLAGS_IDX_SHIFT);
1124
1125 /*
1126 * Just deal with the simple first time here.
1127 */
1128 if (!u16)
1129 {
1130 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1131 u16 = (1 << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) | pShwPage->idx;
1132 }
1133 else
1134 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1135
1136 /* write back, trying to be clever... */
1137 Log2(("SyncPageWorkerTrackAddRef: u16=%#x pPage->HCPhys=%VHp->%VHp iPTDst=%#x\n",
1138 u16, pPage->HCPhys, (pPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) | ((uint64_t)u16 << MM_RAM_FLAGS_CREFS_SHIFT), iPTDst));
1139 *((uint16_t *)&pPage->HCPhys + 3) = u16; /** @todo PAGE FLAGS */
1140# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1141
1142 /* update statistics. */
1143 pVM->pgm.s.CTXSUFF(pPool)->cPresent++;
1144 pShwPage->cPresent++;
1145 if (pShwPage->iFirstPresent > iPTDst)
1146 pShwPage->iFirstPresent = iPTDst;
1147}
1148#endif /* PGMPOOL_WITH_USER_TRACKING */
1149
1150
1151/**
1152 * Creates a 4K shadow page for a guest page.
1153 *
1154 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1155 * physical address. The PdeSrc argument only the flags are used. No page structured
1156 * will be mapped in this function.
1157 *
1158 * @param pVM VM handle.
1159 * @param pPteDst Destination page table entry.
1160 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1161 * Can safely assume that only the flags are being used.
1162 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1163 * @param pShwPage Pointer to the shadow page.
1164 * @param iPTDst The index into the shadow table.
1165 *
1166 * @remark Not used for 2/4MB pages!
1167 */
1168DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, VBOXPDE PdeSrc, VBOXPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1169{
1170 if (PteSrc.n.u1Present)
1171 {
1172 /*
1173 * Find the ram range.
1174 */
1175 PPGMPAGE pPage;
1176 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & X86_PTE_PG_MASK, &pPage);
1177 if (VBOX_SUCCESS(rc))
1178 {
1179 /** @todo investiage PWT, PCD and PAT. */
1180 /*
1181 * Make page table entry.
1182 */
1183 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo FLAGS */
1184 SHWPTE PteDst;
1185 if (HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE))
1186 {
1187 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1188 if (!(HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL)))
1189 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1190 | (HCPhys & X86_PTE_PAE_PG_MASK);
1191 else
1192 PteDst.u = 0;
1193 /** @todo count these two kinds. */
1194 }
1195 else
1196 {
1197#ifdef PGM_SYNC_DIRTY_BIT
1198# ifdef PGM_SYNC_ACCESSED_BIT
1199 /*
1200 * If the page or page directory entry is not marked accessed,
1201 * we mark the page not present.
1202 */
1203 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1204 {
1205 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,AccessedPage));
1206 PteDst.u = 0;
1207 }
1208 else
1209# endif
1210 /*
1211 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1212 * when the page is modified.
1213 */
1214 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1215 {
1216 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPage));
1217 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1218 | (HCPhys & X86_PTE_PAE_PG_MASK)
1219 | PGM_PTFLAGS_TRACK_DIRTY;
1220 }
1221 else
1222 {
1223 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageSkipped));
1224 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1225 | (HCPhys & X86_PTE_PAE_PG_MASK);
1226 }
1227#endif
1228 }
1229
1230#ifdef PGMPOOL_WITH_USER_TRACKING
1231 /*
1232 * Keep user track up to date.
1233 */
1234 if (PteDst.n.u1Present)
1235 {
1236 if (!pPteDst->n.u1Present)
1237 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1238 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1239 {
1240 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1241 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1242 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1243 }
1244 }
1245 else if (pPteDst->n.u1Present)
1246 {
1247 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1248 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1249 }
1250#endif /* PGMPOOL_WITH_USER_TRACKING */
1251
1252 /*
1253 * Update statistics and commit the entry.
1254 */
1255 if (!PteSrc.n.u1Global)
1256 pShwPage->fSeenNonGlobal = true;
1257 *pPteDst = PteDst;
1258 }
1259 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1260 /** @todo count these. */
1261 }
1262 else
1263 {
1264 /*
1265 * Page not-present.
1266 */
1267#ifdef PGMPOOL_WITH_USER_TRACKING
1268 /* Keep user track up to date. */
1269 if (pPteDst->n.u1Present)
1270 {
1271 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1272 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1273 }
1274#endif /* PGMPOOL_WITH_USER_TRACKING */
1275 pPteDst->u = 0;
1276 /** @todo count these. */
1277 }
1278}
1279
1280
1281/**
1282 * Syncs a guest OS page.
1283 *
1284 * There are no conflicts at this point, neither is there any need for
1285 * page table allocations.
1286 *
1287 * @returns VBox status code.
1288 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1289 * @param pVM VM handle.
1290 * @param PdeSrc Page directory entry of the guest.
1291 * @param GCPtrPage Guest context page address.
1292 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1293 * @param uErr Fault error (X86_TRAP_PF_*).
1294 */
1295PGM_BTH_DECL(int, SyncPage)(PVM pVM, VBOXPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr)
1296{
1297 LogFlow(("SyncPage: GCPtrPage=%VGv cPages=%d uErr=%#x\n", GCPtrPage, cPages, uErr));
1298
1299#if PGM_GST_TYPE == PGM_TYPE_32BIT
1300
1301# if PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
1302# error "Invalid shadow mode for 32-bit guest mode!"
1303# endif
1304
1305 /*
1306 * Assert preconditions.
1307 */
1308# if GC_ARCH_BITS != 32
1309 Assert(GCPtrPage < _4G); //???
1310# endif
1311 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPagePD[(GCPtrPage >> X86_PD_SHIFT) & X86_PD_MASK]);
1312 Assert(PdeSrc.n.u1Present);
1313 Assert(cPages);
1314
1315 /*
1316 * Get the shadow PDE, find the shadow page table in the pool.
1317 */
1318 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1319# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1320 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1321# else /* PAE */
1322 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1323# endif
1324 Assert(PdeDst.n.u1Present);
1325 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1326
1327 /*
1328 * Check that the page is present and that the shadow PDE isn't out of sync.
1329 */
1330 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1331 RTGCPHYS GCPhys;
1332 if (!fBigPage)
1333 {
1334 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1335# if PGM_SHW_TYPE != PGM_TYPE_32BIT
1336 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1337# endif
1338 }
1339 else
1340 {
1341 GCPhys = PdeSrc.u & GST_PDE4M_PG_MASK;
1342# if PGM_SHW_TYPE != PGM_TYPE_32BIT
1343 GCPhys |= GCPtrPage & X86_PAGE_2M_SIZE;
1344# endif
1345 }
1346 if ( pShwPage->GCPhys == GCPhys
1347 && PdeSrc.n.u1Present
1348 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1349 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1350 )
1351 {
1352# ifdef PGM_SYNC_ACCESSED_BIT
1353 /*
1354 * Check that the PDE is marked accessed already.
1355 * Since we set the accessed bit *before* getting here on a #PF, this
1356 * check is only meant for dealing with non-#PF'ing paths.
1357 */
1358 if (PdeSrc.n.u1Accessed)
1359# endif
1360 {
1361 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1362 if (!fBigPage)
1363 {
1364 /*
1365 * 4KB Page - Map the guest page table.
1366 */
1367 PVBOXPT pPTSrc;
1368 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & X86_PDE_PG_MASK, &pPTSrc);
1369 if (VBOX_SUCCESS(rc))
1370 {
1371# ifdef PGM_SYNC_N_PAGES
1372 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1373 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1374 {
1375 /*
1376 * This code path is currently only taken when the caller is PGMTrap0eHandler
1377 * for non-present pages!
1378 *
1379 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1380 * deal with locality.
1381 */
1382 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1383# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1384 const unsigned offPTSrc = 0;
1385# else
1386 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1387# endif
1388 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1389 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1390 iPTDst = 0;
1391 else
1392 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1393 for (; iPTDst < iPTDstEnd; iPTDst++)
1394 {
1395 if (!pPTDst->a[iPTDst].n.u1Present)
1396 {
1397 VBOXPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1398 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(X86_PT_MASK << X86_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1399 NOREF(GCPtrCurPage);
1400#ifndef IN_RING0
1401 /*
1402 * Assuming kernel code will be marked as supervisor - and not as user level
1403 * and executed using a conforming code selector - And marked as readonly.
1404 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1405 */
1406 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1407 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1408 || !CSAMDoesPageNeedScanning(pVM, (RTGCPTR)GCPtrCurPage)
1409 || pgmRamTestFlags(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK,
1410 MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE)
1411 )
1412#endif
1413 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1414 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1415 GCPtrCurPage, PteSrc.n.u1Present,
1416 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1417 PteSrc.n.u1User & PdeSrc.n.u1User,
1418 (uint64_t)PteSrc.u,
1419 (uint64_t)pPTDst->a[iPTDst].u,
1420 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1421 }
1422 }
1423 }
1424 else
1425# endif /* PGM_SYNC_N_PAGES */
1426 {
1427 const unsigned iPTSrc = (GCPtrPage >> X86_PT_SHIFT) & X86_PT_MASK;
1428 VBOXPTE PteSrc = pPTSrc->a[iPTSrc];
1429 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1430 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1431 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1432 GCPtrPage, PteSrc.n.u1Present,
1433 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1434 PteSrc.n.u1User & PdeSrc.n.u1User,
1435 (uint64_t)PteSrc.u,
1436 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1437 }
1438 }
1439 else /* MMIO or invalid page: emulated in #PF handler. */
1440 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1441 }
1442 else
1443 {
1444 /*
1445 * 4/2MB page - lazy syncing shadow 4K pages.
1446 * (There are many causes of getting here, it's no longer only CSAM.)
1447 */
1448 /* Calculate the GC physical address of this 4KB shadow page. */
1449 RTGCPHYS GCPhys = (PdeSrc.u & X86_PDE4M_PAE_PG_MASK) | ((RTGCUINTPTR)GCPtrPage & PAGE_OFFSET_MASK_BIG);
1450 /* Find ram range. */
1451 PPGMPAGE pPage;
1452 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1453 if (VBOX_SUCCESS(rc))
1454 {
1455 /*
1456 * Make shadow PTE entry.
1457 */
1458 RTHCPHYS HCPhys = pPage->HCPhys; /** @todo PAGE FLAGS */
1459 SHWPTE PteDst;
1460 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1461 | (HCPhys & X86_PTE_PAE_PG_MASK);
1462 if (HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE))
1463 {
1464 if (!(HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL)))
1465 PteDst.n.u1Write = 0;
1466 else
1467 PteDst.u = 0;
1468 }
1469 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1470# ifdef PGMPOOL_WITH_USER_TRACKING
1471 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1472 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1473# endif
1474 pPTDst->a[iPTDst] = PteDst;
1475
1476
1477# ifdef PGM_SYNC_DIRTY_BIT
1478 /*
1479 * If the page is not flagged as dirty and is writable, then make it read-only
1480 * at PD level, so we can set the dirty bit when the page is modified.
1481 *
1482 * ASSUMES that page access handlers are implemented on page table entry level.
1483 * Thus we will first catch the dirty access and set PDE.D and restart. If
1484 * there is an access handler, we'll trap again and let it work on the problem.
1485 */
1486 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1487 * As for invlpg, it simply frees the whole shadow PT.
1488 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1489 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1490 {
1491 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
1492 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1493 PdeDst.n.u1Write = 0;
1494 }
1495 else
1496 {
1497 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1498 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1499 }
1500# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1501 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst] = PdeDst;
1502# else /* PAE */
1503 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst] = PdeDst;
1504# endif
1505# endif /* PGM_SYNC_DIRTY_BIT */
1506 Log2(("SyncPage: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%VGp%s\n",
1507 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1508 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1509 }
1510 }
1511 return VINF_SUCCESS;
1512 }
1513# ifdef PGM_SYNC_ACCESSED_BIT
1514 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDNAs));
1515#endif
1516 }
1517 else
1518 {
1519 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDOutOfSync));
1520 Log2(("SyncPage: Out-Of-Sync PDE at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1521 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1522 }
1523
1524 /*
1525 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1526 * Yea, I'm lazy.
1527 */
1528 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1529# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1530 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst].u = 0;
1531# else /* PAE */
1532 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst].u = 0;
1533# endif
1534 PGM_INVL_GUEST_TLBS();
1535 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1536
1537#elif PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT
1538
1539# ifdef PGM_SYNC_N_PAGES
1540 /*
1541 * Get the shadow PDE, find the shadow page table in the pool.
1542 */
1543 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1544# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1545 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1546# else /* PAE */
1547 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1548# endif
1549 Assert(PdeDst.n.u1Present);
1550 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1551 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1552
1553# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1554 const unsigned offPTSrc = 0;
1555# else
1556 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1557# endif
1558
1559 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1560 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1561 {
1562 /*
1563 * This code path is currently only taken when the caller is PGMTrap0eHandler
1564 * for non-present pages!
1565 *
1566 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1567 * deal with locality.
1568 */
1569 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1570 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1571 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1572 iPTDst = 0;
1573 else
1574 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1575 for (; iPTDst < iPTDstEnd; iPTDst++)
1576 {
1577 if (!pPTDst->a[iPTDst].n.u1Present)
1578 {
1579 VBOXPTE PteSrc;
1580
1581 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(X86_PT_MASK << X86_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1582
1583 /* Fake the page table entry */
1584 PteSrc.u = GCPtrCurPage;
1585 PteSrc.n.u1Present = 1;
1586 PteSrc.n.u1Dirty = 1;
1587 PteSrc.n.u1Accessed = 1;
1588 PteSrc.n.u1Write = 1;
1589 PteSrc.n.u1User = 1;
1590
1591 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1592
1593 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1594 GCPtrCurPage, PteSrc.n.u1Present,
1595 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1596 PteSrc.n.u1User & PdeSrc.n.u1User,
1597 (uint64_t)PteSrc.u,
1598 (uint64_t)pPTDst->a[iPTDst].u,
1599 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1600 }
1601 }
1602 }
1603 else
1604# endif /* PGM_SYNC_N_PAGES */
1605 {
1606 VBOXPTE PteSrc;
1607 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1608 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(X86_PT_MASK << X86_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1609
1610 /* Fake the page table entry */
1611 PteSrc.u = GCPtrCurPage;
1612 PteSrc.n.u1Present = 1;
1613 PteSrc.n.u1Dirty = 1;
1614 PteSrc.n.u1Accessed = 1;
1615 PteSrc.n.u1Write = 1;
1616 PteSrc.n.u1User = 1;
1617 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1618
1619 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1620 GCPtrPage, PteSrc.n.u1Present,
1621 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1622 PteSrc.n.u1User & PdeSrc.n.u1User,
1623 (uint64_t)PteSrc.u,
1624 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1625 }
1626 return VINF_SUCCESS;
1627
1628#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1629 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
1630 return VERR_INTERNAL_ERROR;
1631#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1632}
1633
1634
1635
1636#if PGM_WITH_PAGING(PGM_GST_TYPE)
1637
1638# ifdef PGM_SYNC_DIRTY_BIT
1639
1640/**
1641 * Investigate page fault and handle write protection page faults caused by
1642 * dirty bit tracking.
1643 *
1644 * @returns VBox status code.
1645 * @param pVM VM handle.
1646 * @param uErr Page fault error code.
1647 * @param pPdeDst Shadow page directory entry.
1648 * @param pPdeSrc Guest page directory entry.
1649 * @param GCPtrPage Guest context page address.
1650 */
1651PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PVBOXPDE pPdeSrc, RTGCUINTPTR GCPtrPage)
1652{
1653 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
1654 LogFlow(("CheckPageFault: GCPtrPage=%VGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
1655
1656 /*
1657 * Real page fault?
1658 */
1659 if ( (uErr & X86_TRAP_PF_RSVD)
1660 || !pPdeSrc->n.u1Present
1661 || ((uErr & X86_TRAP_PF_RW) && !pPdeSrc->n.u1Write)
1662 || ((uErr & X86_TRAP_PF_US) && !pPdeSrc->n.u1User) )
1663 {
1664# ifdef IN_GC
1665 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
1666# endif
1667 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
1668 LogFlow(("CheckPageFault: real page fault at %VGv (1)\n", GCPtrPage));
1669
1670 if (pPdeSrc->n.u1Present)
1671 {
1672 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
1673 * See the 2nd case below as well.
1674 */
1675 if (pPdeSrc->b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE))
1676 {
1677 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
1678 }
1679 else
1680 {
1681 /*
1682 * Map the guest page table.
1683 */
1684 PVBOXPT pPTSrc;
1685 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & X86_PDE_PG_MASK, &pPTSrc);
1686 if (VBOX_SUCCESS(rc))
1687 {
1688 PVBOXPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> PAGE_SHIFT) & PTE_MASK];
1689 const VBOXPTE PteSrc = *pPteSrc;
1690 if (pPteSrc->n.u1Present)
1691 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
1692 }
1693 AssertRC(rc);
1694 }
1695 }
1696 return VINF_EM_RAW_GUEST_TRAP;
1697 }
1698
1699 /*
1700 * First check the easy case where the page directory has been marked read-only to track
1701 * the dirty bit of an emulated BIG page
1702 */
1703 if (pPdeSrc->b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE))
1704 {
1705 /* Mark guest page directory as accessed */
1706 pPdeSrc->b.u1Accessed = 1;
1707
1708 /*
1709 * Only write protection page faults are relevant here.
1710 */
1711 if (uErr & X86_TRAP_PF_RW)
1712 {
1713 /* Mark guest page directory as dirty (BIG page only). */
1714 pPdeSrc->b.u1Dirty = 1;
1715
1716 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
1717 {
1718 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
1719
1720 Assert(pPdeSrc->b.u1Write);
1721
1722 pPdeDst->n.u1Write = 1;
1723 pPdeDst->n.u1Accessed = 1;
1724 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1725 PGM_INVL_BIG_PG(GCPtrPage);
1726 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1727 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
1728 }
1729 }
1730 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1731 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
1732 }
1733 /* else: 4KB page table */
1734
1735 /*
1736 * Map the guest page table.
1737 */
1738 PVBOXPT pPTSrc;
1739 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & X86_PDE_PG_MASK, &pPTSrc);
1740 if (VBOX_SUCCESS(rc))
1741 {
1742 /*
1743 * Real page fault?
1744 */
1745 PVBOXPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> PAGE_SHIFT) & PTE_MASK];
1746 const VBOXPTE PteSrc = *pPteSrc;
1747 if ( !PteSrc.n.u1Present
1748 || ((uErr & X86_TRAP_PF_RW) && !PteSrc.n.u1Write)
1749 || ((uErr & X86_TRAP_PF_US) && !PteSrc.n.u1User)
1750 )
1751 {
1752# ifdef IN_GC
1753 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
1754# endif
1755 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1756 LogFlow(("CheckPageFault: real page fault at %VGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
1757
1758 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
1759 * See the 2nd case above as well.
1760 */
1761 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
1762 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
1763
1764 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1765 return VINF_EM_RAW_GUEST_TRAP;
1766 }
1767 LogFlow(("CheckPageFault: page fault at %VGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
1768
1769 /*
1770 * Set the accessed bits in the page directory and the page table.
1771 */
1772 pPdeSrc->n.u1Accessed = 1;
1773 pPteSrc->n.u1Accessed = 1;
1774
1775 /*
1776 * Only write protection page faults are relevant here.
1777 */
1778 if (uErr & X86_TRAP_PF_RW)
1779 {
1780 /* Write access, so mark guest entry as dirty. */
1781# if defined(IN_GC) && defined(VBOX_WITH_STATISTICS)
1782 if (!pPteSrc->n.u1Dirty)
1783 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtiedPage);
1784 else
1785 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageAlreadyDirty);
1786# endif
1787 pPteSrc->n.u1Dirty = 1;
1788
1789 if (pPdeDst->n.u1Present)
1790 {
1791 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
1792 * Our individual shadow handlers will provide more information and force a fatal exit.
1793 */
1794 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
1795 {
1796 LogRel(("CheckPageFault: write to hypervisor region %VGv\n", GCPtrPage));
1797 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1798 return VINF_SUCCESS;
1799 }
1800
1801 /*
1802 * Map shadow page table.
1803 */
1804 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
1805 if (pShwPage)
1806 {
1807 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1808 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
1809 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
1810 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
1811 {
1812 LogFlow(("DIRTY page trap addr=%VGv\n", GCPtrPage));
1813# ifdef VBOX_STRICT
1814 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & X86_PTE_PG_MASK);
1815 if (pPage)
1816 AssertMsg(!(pPage->HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE)), /** @todo PAGE FLAGS */
1817 ("Unexpected dirty bit tracking on monitored page %VGv (phys %VGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
1818# endif
1819 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
1820
1821 Assert(pPteSrc->n.u1Write);
1822
1823 pPteDst->n.u1Write = 1;
1824 pPteDst->n.u1Dirty = 1;
1825 pPteDst->n.u1Accessed = 1;
1826 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
1827 PGM_INVL_PG(GCPtrPage);
1828
1829 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1830 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
1831 }
1832 }
1833 else
1834 AssertMsgFailed(("pgmPoolGetPageByHCPhys %VGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
1835 }
1836 }
1837/** @todo Optimize accessed bit emulation? */
1838# ifdef VBOX_STRICT
1839 /*
1840 * Sanity check.
1841 */
1842 else if ( !pPteSrc->n.u1Dirty
1843 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
1844 && pPdeDst->n.u1Present)
1845 {
1846 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
1847 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1848 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
1849 if ( pPteDst->n.u1Present
1850 && pPteDst->n.u1Write)
1851 LogFlow(("Writable present page %VGv not marked for dirty bit tracking!!!\n", GCPtrPage));
1852 }
1853# endif /* VBOX_STRICT */
1854 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1855 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
1856 }
1857 AssertRC(rc);
1858 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1859 return rc;
1860}
1861
1862# endif
1863
1864#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
1865
1866
1867/**
1868 * Sync a shadow page table.
1869 *
1870 * The shadow page table is not present. This includes the case where
1871 * there is a conflict with a mapping.
1872 *
1873 * @returns VBox status code.
1874 * @param pVM VM handle.
1875 * @param iPD Page directory index.
1876 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
1877 * Assume this is a temporary mapping.
1878 * @param GCPtrPage GC Pointer of the page that caused the fault
1879 */
1880PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PVBOXPD pPDSrc, RTGCUINTPTR GCPtrPage)
1881{
1882 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
1883 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPtPD[iPDSrc]);
1884 LogFlow(("SyncPT: GCPtrPage=%VGv\n", GCPtrPage));
1885
1886#if PGM_GST_TYPE == PGM_TYPE_32BIT
1887
1888# if PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
1889# error "Invalid shadow mode for 32-bit guest mode!"
1890# endif
1891
1892 /*
1893 * Validate input a little bit.
1894 */
1895 Assert(iPDSrc == (GCPtrPage >> GST_PD_SHIFT));
1896# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1897 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
1898# else
1899 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
1900# endif
1901 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1902 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
1903 SHWPDE PdeDst = *pPdeDst;
1904
1905 /*
1906 * Check for conflicts.
1907 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
1908 * HC: Simply resolve the conflict.
1909 */
1910 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1911 {
1912 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1913# ifndef IN_RING3
1914 Log(("SyncPT: Conflict at %VGv\n", GCPtrPage));
1915 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
1916 return VERR_ADDRESS_CONFLICT;
1917# else
1918 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
1919 Assert(pMapping);
1920 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPDSrc);
1921 if (VBOX_FAILURE(rc))
1922 {
1923 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
1924 return rc;
1925 }
1926 PdeDst = *pPdeDst;
1927# endif
1928 }
1929 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
1930
1931 /*
1932 * Sync page directory entry.
1933 */
1934 int rc = VINF_SUCCESS;
1935 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1936 if (PdeSrc.n.u1Present)
1937 {
1938 /*
1939 * Allocate & map the page table.
1940 */
1941 PSHWPT pPTDst;
1942 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1943 PPGMPOOLPAGE pShwPage;
1944 RTGCPHYS GCPhys;
1945 if (fPageTable)
1946 {
1947 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1948# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1949 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1950# endif
1951 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
1952 }
1953 else
1954 {
1955 GCPhys = PdeSrc.u & GST_PDE4M_PG_MASK;
1956# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1957 GCPhys |= GCPtrPage & BIT(X86_PAGE_2M_SHIFT);
1958# endif
1959 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
1960 }
1961 if (rc == VINF_SUCCESS)
1962 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1963 else if (rc == VINF_PGM_CACHED_PAGE)
1964 {
1965 /*
1966 * The PT was cached, just hook it up.
1967 */
1968 if (fPageTable)
1969 PdeDst.u = pShwPage->Core.Key
1970 | (PdeSrc.u & ~(X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
1971 else
1972 {
1973 PdeDst.u = pShwPage->Core.Key
1974 | (PdeSrc.u & ~(X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
1975# ifdef PGM_SYNC_DIRTY_BIT /* (see explanation and assumtions further down.) */
1976 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1977 {
1978 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
1979 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1980 PdeDst.b.u1Write = 0;
1981 }
1982# endif
1983 }
1984 *pPdeDst = PdeDst;
1985 return VINF_SUCCESS;
1986 }
1987 else if (rc == VERR_PGM_POOL_FLUSHED)
1988 return VINF_PGM_SYNC_CR3;
1989 else
1990 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
1991 PdeDst.u &= X86_PDE_AVL_MASK;
1992 PdeDst.u |= pShwPage->Core.Key;
1993
1994# ifdef PGM_SYNC_DIRTY_BIT
1995 /*
1996 * Page directory has been accessed (this is a fault situation, remember).
1997 */
1998 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
1999# endif
2000 if (fPageTable)
2001 {
2002 /*
2003 * Page table - 4KB.
2004 *
2005 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2006 */
2007 Log2(("SyncPT: 4K %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2008 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2009 PGSTPT pPTSrc;
2010 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2011 if (VBOX_SUCCESS(rc))
2012 {
2013 /*
2014 * Start by syncing the page directory entry so CSAM's TLB trick works.
2015 */
2016 PdeDst.u = (PdeDst.u & (X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK))
2017 | (PdeSrc.u & ~(X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2018 *pPdeDst = PdeDst;
2019
2020 /*
2021 * Directory/page user or supervisor privilege: (same goes for read/write)
2022 *
2023 * Directory Page Combined
2024 * U/S U/S U/S
2025 * 0 0 0
2026 * 0 1 0
2027 * 1 0 0
2028 * 1 1 1
2029 *
2030 * Simple AND operation. Table listed for completeness.
2031 *
2032 */
2033 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4k));
2034# ifdef PGM_SYNC_N_PAGES
2035 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2036 unsigned iPTDst = iPTBase;
2037 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
2038 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2039 iPTDst = 0;
2040 else
2041 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2042# else /* !PGM_SYNC_N_PAGES */
2043 unsigned iPTDst = 0;
2044 const unsigned iPTDstEnd = ELEMENTS(pPTDst->a);
2045# endif /* !PGM_SYNC_N_PAGES */
2046# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2047 const unsigned offPTSrc = 0;
2048# else
2049 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2050# endif
2051 for (; iPTDst < iPTDstEnd; iPTDst++)
2052 {
2053 const unsigned iPTSrc = iPTDst + offPTSrc;
2054 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2055
2056 if (PteSrc.n.u1Present) /* we've already cleared it above */
2057 {
2058#ifndef IN_RING0
2059 /*
2060 * Assuming kernel code will be marked as supervisor - and not as user level
2061 * and executed using a conforming code selector - And marked as readonly.
2062 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2063 */
2064 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2065 || !CSAMDoesPageNeedScanning(pVM, (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2066 || pgmRamTestFlags(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK,
2067 MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE)
2068 )
2069#endif
2070 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2071 Log2(("SyncPT: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%VGp\n",
2072 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2073 PteSrc.n.u1Present,
2074 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2075 PteSrc.n.u1User & PdeSrc.n.u1User,
2076 (uint64_t)PteSrc.u,
2077 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2078 (PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)));
2079 }
2080 } /* for PTEs */
2081 }
2082 }
2083 else
2084 {
2085 /*
2086 * Big page - 2/4MB.
2087 *
2088 * We'll walk the ram range list in parallel and optimize lookups.
2089 * We will only sync on shadow page table at a time.
2090 */
2091 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4M));
2092
2093 /**
2094 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2095 */
2096
2097 /*
2098 * Start by syncing the page directory entry.
2099 */
2100 PdeDst.u = (PdeDst.u & (X86_PDE_PAE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2101 | (PdeSrc.u & ~(X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2102
2103# ifdef PGM_SYNC_DIRTY_BIT
2104 /*
2105 * If the page is not flagged as dirty and is writable, then make it read-only
2106 * at PD level, so we can set the dirty bit when the page is modified.
2107 *
2108 * ASSUMES that page access handlers are implemented on page table entry level.
2109 * Thus we will first catch the dirty access and set PDE.D and restart. If
2110 * there is an access handler, we'll trap again and let it work on the problem.
2111 */
2112 /** @todo move the above stuff to a section in the PGM documentation. */
2113 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2114 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2115 {
2116 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
2117 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2118 PdeDst.b.u1Write = 0;
2119 }
2120# endif /* PGM_SYNC_DIRTY_BIT */
2121 *pPdeDst = PdeDst;
2122
2123 /*
2124 * Fill the shadow page table.
2125 */
2126 /* Get address and flags from the source PDE. */
2127 SHWPTE PteDstBase;
2128 PteDstBase.u = PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2129
2130 /* Loop thru the entries in the shadow PT. */
2131 const RTGCUINTPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2132 Log2(("SyncPT: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%VGv GCPhys=%VGp %s\n",
2133 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2134 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2135 PPGMRAMRANGE pRam = CTXSUFF(pVM->pgm.s.pRamRanges);
2136 unsigned iPTDst = 0;
2137 while (iPTDst < ELEMENTS(pPTDst->a))
2138 {
2139 /* Advance ram range list. */
2140 while (pRam && GCPhys > pRam->GCPhysLast)
2141 pRam = CTXSUFF(pRam->pNext);
2142 if (pRam && GCPhys >= pRam->GCPhys)
2143 {
2144 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2145 do
2146 {
2147 /* Make shadow PTE. */
2148 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2149 SHWPTE PteDst;
2150
2151 /* Make sure the RAM has already been allocated. */
2152 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC) /** @todo PAGE FLAGS */
2153 {
2154 if (RT_UNLIKELY(!PGM_PAGE_GET_HCPHYS(pPage)))
2155 {
2156# ifdef IN_RING3
2157 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2158# else
2159 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2160# endif
2161 if (rc != VINF_SUCCESS)
2162 return rc;
2163 }
2164 }
2165
2166 if (pPage->HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE)) /** @todo PAGE FLAGS */
2167 {
2168 if (!(pPage->HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL))) /** @todo PAGE FLAGS */
2169 {
2170 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2171 PteDst.n.u1Write = 0;
2172 }
2173 else
2174 PteDst.u = 0;
2175 }
2176# ifndef IN_RING0
2177 /*
2178 * Assuming kernel code will be marked as supervisor and not as user level and executed
2179 * using a conforming code selector. Don't check for readonly, as that implies the whole
2180 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2181 */
2182 else if ( !PdeSrc.n.u1User
2183 && CSAMDoesPageNeedScanning(pVM, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2184 PteDst.u = 0;
2185# endif
2186 else
2187 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2188# ifdef PGMPOOL_WITH_USER_TRACKING
2189 if (PteDst.n.u1Present)
2190 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, pPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst); /** @todo PAGE FLAGS */
2191# endif
2192 /* commit it */
2193 pPTDst->a[iPTDst] = PteDst;
2194 Log4(("SyncPT: BIG %VGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2195 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2196 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2197
2198 /* advance */
2199 GCPhys += PAGE_SIZE;
2200 iHCPage++;
2201 iPTDst++;
2202 } while ( iPTDst < ELEMENTS(pPTDst->a)
2203 && GCPhys <= pRam->GCPhysLast);
2204 }
2205 else if (pRam)
2206 {
2207 Log(("Invalid pages at %VGp\n", GCPhys));
2208 do
2209 {
2210 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2211 GCPhys += PAGE_SIZE;
2212 iPTDst++;
2213 } while ( iPTDst < ELEMENTS(pPTDst->a)
2214 && GCPhys < pRam->GCPhys);
2215 }
2216 else
2217 {
2218 Log(("Invalid pages at %VGp (2)\n", GCPhys));
2219 for ( ; iPTDst < ELEMENTS(pPTDst->a); iPTDst++)
2220 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2221 }
2222 } /* while more PTEs */
2223 } /* 4KB / 4MB */
2224 }
2225 else
2226 AssertRelease(!PdeDst.n.u1Present);
2227
2228 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2229# ifdef IN_GC
2230 if (VBOX_FAILURE(rc))
2231 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPTFailed));
2232# endif
2233 return rc;
2234
2235#elif PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT
2236
2237 int rc = VINF_SUCCESS;
2238
2239 /*
2240 * Validate input a little bit.
2241 */
2242# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2243 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
2244# else
2245 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2246# endif
2247 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2248 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2249 SHWPDE PdeDst = *pPdeDst;
2250
2251 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2252 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2253
2254 VBOXPDE PdeSrc;
2255 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2256 PdeSrc.n.u1Present = 1;
2257 PdeSrc.n.u1Write = 1;
2258 PdeSrc.n.u1Accessed = 1;
2259 PdeSrc.n.u1User = 1;
2260
2261 /*
2262 * Allocate & map the page table.
2263 */
2264 PSHWPT pPTDst;
2265 PPGMPOOLPAGE pShwPage;
2266 RTGCPHYS GCPhys;
2267
2268 /* Virtual address = physical address */
2269 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK_32;
2270 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2271
2272 if ( rc == VINF_SUCCESS
2273 || rc == VINF_PGM_CACHED_PAGE)
2274 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2275 else
2276 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
2277
2278 PdeDst.u &= X86_PDE_AVL_MASK;
2279 PdeDst.u |= pShwPage->Core.Key;
2280 PdeDst.n.u1Present = 1;
2281 *pPdeDst = PdeDst;
2282
2283 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2284 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2285 return rc;
2286
2287#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2288
2289 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2290 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2291 return VERR_INTERNAL_ERROR;
2292#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2293}
2294
2295
2296
2297/**
2298 * Prefetch a page/set of pages.
2299 *
2300 * Typically used to sync commonly used pages before entering raw mode
2301 * after a CR3 reload.
2302 *
2303 * @returns VBox status code.
2304 * @param pVM VM handle.
2305 * @param GCPtrPage Page to invalidate.
2306 */
2307PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage)
2308{
2309#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) && PGM_SHW_TYPE != PGM_TYPE_AMD64
2310
2311# if PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
2312# error "Invalid shadow mode for 32-bit guest mode!"
2313# endif
2314
2315 /*
2316 * Check that all Guest levels thru the PDE are present, getting the
2317 * PD and PDE in the processes.
2318 */
2319 int rc = VINF_SUCCESS;
2320# if PGM_WITH_PAGING(PGM_GST_TYPE)
2321 PVBOXPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2322 const unsigned iPDSrc = (RTGCUINTPTR)GCPtrPage >> GST_PD_SHIFT;
2323# else
2324 PVBOXPD pPDSrc = NULL;
2325 const unsigned iPDSrc = 0;
2326# endif
2327
2328# if PGM_WITH_PAGING(PGM_GST_TYPE)
2329 const VBOXPDE PdeSrc = pPDSrc->a[iPDSrc];
2330# else
2331 VBOXPDE PdeSrc;
2332 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2333 PdeSrc.n.u1Present = 1;
2334 PdeSrc.n.u1Write = 1;
2335 PdeSrc.n.u1Accessed = 1;
2336 PdeSrc.n.u1User = 1;
2337# endif
2338
2339# ifdef PGM_SYNC_ACCESSED_BIT
2340 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
2341# else
2342 if (PdeSrc.n.u1Present)
2343# endif
2344 {
2345# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2346 const X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> X86_PD_SHIFT];
2347# else
2348 const X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> X86_PD_PAE_SHIFT];
2349# endif
2350 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
2351 {
2352 if (!PdeDst.n.u1Present)
2353 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
2354 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2355 else
2356 {
2357 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
2358 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
2359 * makes no sense to prefetch more than one page.
2360 */
2361 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2362 if (VBOX_SUCCESS(rc))
2363 rc = VINF_SUCCESS;
2364 }
2365 }
2366 }
2367 return rc;
2368
2369#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2370
2371 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
2372 return VERR_INTERNAL_ERROR;
2373#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2374}
2375
2376
2377
2378
2379/**
2380 * Syncs a page during a PGMVerifyAccess() call.
2381 *
2382 * @returns VBox status code (informational included).
2383 * @param GCPtrPage The address of the page to sync.
2384 * @param fPage The effective guest page flags.
2385 * @param uErr The trap error code.
2386 */
2387PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR GCPtrPage, unsigned fPage, unsigned uErr)
2388{
2389 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%VGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
2390
2391#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) && PGM_SHW_TYPE != PGM_TYPE_AMD64
2392
2393# if PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
2394# error "Invalid shadow mode for 32-bit guest mode!"
2395# endif
2396
2397#ifndef IN_RING0
2398 if (!(fPage & X86_PTE_US))
2399 {
2400 /*
2401 * Mark this page as safe.
2402 */
2403 /** @todo not correct for pages that contain both code and data!! */
2404 Log(("CSAMMarkPage %VGv; scanned=%d\n", GCPtrPage, true));
2405 CSAMMarkPage(pVM, (RTGCPTR)GCPtrPage, true);
2406 }
2407#endif
2408 /*
2409 * Get guest PD and index.
2410 */
2411 unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
2412 PVBOXPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2413 int rc = VINF_SUCCESS;
2414
2415 /*
2416 * First check if the shadow pd is present.
2417 */
2418# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2419 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> X86_PD_SHIFT];
2420# else
2421 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> X86_PD_PAE_SHIFT];
2422# endif
2423 if (!pPdeDst->n.u1Present)
2424 {
2425 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2426 AssertRC(rc);
2427 if (rc != VINF_SUCCESS)
2428 return rc;
2429 }
2430
2431# if PGM_WITH_PAGING(PGM_GST_TYPE)
2432 /* Check for dirty bit fault */
2433 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
2434 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
2435 Log(("PGMVerifyAccess: success (dirty)\n"));
2436 else
2437 {
2438 VBOXPDE PdeSrc = pPDSrc->a[iPDSrc];
2439#else
2440 {
2441 VBOXPDE PdeSrc;
2442 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2443 PdeSrc.n.u1Present = 1;
2444 PdeSrc.n.u1Write = 1;
2445 PdeSrc.n.u1Accessed = 1;
2446 PdeSrc.n.u1User = 1;
2447
2448#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
2449 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
2450 if (uErr & X86_TRAP_PF_US)
2451 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
2452 else /* supervisor */
2453 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
2454
2455 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2456 if (VBOX_SUCCESS(rc))
2457 {
2458 /* Page was successfully synced */
2459 Log(("PGMVerifyAccess: success (sync)\n"));
2460 rc = VINF_SUCCESS;
2461 }
2462 else
2463 {
2464 Log(("PGMVerifyAccess: access violation for %VGv rc=%d\n", GCPtrPage, rc));
2465 return VINF_EM_RAW_GUEST_TRAP;
2466 }
2467 }
2468 return rc;
2469
2470#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2471
2472 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2473 return VERR_INTERNAL_ERROR;
2474#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2475}
2476
2477
2478#if PGM_GST_TYPE == PGM_TYPE_32BIT
2479# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE
2480/**
2481 * Figures out which kind of shadow page this guest PDE warrants.
2482 *
2483 * @returns Shadow page kind.
2484 * @param pPdeSrc The guest PDE in question.
2485 * @param cr4 The current guest cr4 value.
2486 */
2487DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const VBOXPDE *pPdeSrc, uint32_t cr4)
2488{
2489 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
2490 return BTH_PGMPOOLKIND_PT_FOR_PT;
2491 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
2492 //{
2493 // case 0:
2494 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
2495 // case X86_PDE4M_RW:
2496 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
2497 // case X86_PDE4M_US:
2498 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
2499 // case X86_PDE4M_RW | X86_PDE4M_US:
2500 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
2501# if 0
2502 // case X86_PDE4M_PAE_NX:
2503 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
2504 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
2505 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
2506 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
2507 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
2508 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
2509 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
2510# endif
2511 return BTH_PGMPOOLKIND_PT_FOR_BIG;
2512 //}
2513}
2514# endif
2515#endif
2516
2517#undef MY_STAM_COUNTER_INC
2518#define MY_STAM_COUNTER_INC(a) do { } while (0)
2519
2520
2521/**
2522 * Syncs the paging hierarchy starting at CR3.
2523 *
2524 * @returns VBox status code, no specials.
2525 * @param pVM The virtual machine.
2526 * @param cr0 Guest context CR0 register
2527 * @param cr3 Guest context CR3 register
2528 * @param cr4 Guest context CR4 register
2529 * @param fGlobal Including global page directories or not
2530 */
2531PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint32_t cr0, uint32_t cr3, uint32_t cr4, bool fGlobal)
2532{
2533#if PGM_GST_TYPE == PGM_TYPE_32BIT
2534# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE
2535 if (VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
2536 fGlobal = true; /* Change this CR3 reload to be a global one. */
2537# endif
2538#endif
2539
2540 /*
2541 * Update page access handlers.
2542 * The virtual are always flushed, while the physical are only on demand.
2543 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
2544 * have to look into that later because it will have a bad influence on the performance.
2545 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
2546 * bird: Yes, but that won't work for aliases.
2547 */
2548 /** @todo this MUST go away. See #1557. */
2549 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
2550 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
2551 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
2552
2553#ifdef PGMPOOL_WITH_MONITORING
2554 /*
2555 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2556 * Occationally we will have to clear all the shadow page tables because we wanted
2557 * to monitor a page which was mapped by too many shadowed page tables. This operation
2558 * sometimes refered to as a 'lightweight flush'.
2559 */
2560 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2561 pgmPoolMonitorModifiedClearAll(pVM);
2562 else
2563 {
2564# ifdef IN_RING3
2565 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2566 pgmPoolClearAll(pVM);
2567# else
2568 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2569 return VINF_PGM_SYNC_CR3;
2570# endif
2571 }
2572#endif
2573
2574 Assert(fGlobal || (cr4 & X86_CR4_PGE));
2575 MY_STAM_COUNTER_INC(fGlobal ? &pVM->pgm.s.CTXMID(Stat,SyncCR3Global) : &pVM->pgm.s.CTXMID(Stat,SyncCR3NotGlobal));
2576
2577#if PGM_GST_TYPE == PGM_TYPE_32BIT
2578# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE
2579 /*
2580 * Get page directory addresses.
2581 */
2582# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2583 PX86PDE pPDEDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[0];
2584# else
2585 PX86PDEPAE pPDEDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[0];
2586# endif
2587 PVBOXPD pPDSrc = pVM->pgm.s.CTXSUFF(pGuestPD);
2588
2589 Assert(pPDSrc);
2590#ifndef IN_GC
2591 Assert(MMPhysGCPhys2HCVirt(pVM, (RTGCPHYS)(cr3 & X86_CR3_PAGE_MASK), sizeof(*pPDSrc)) == pPDSrc);
2592#endif
2593
2594 /*
2595 * Iterate the page directory.
2596 */
2597 PPGMMAPPING pMapping;
2598 unsigned iPdNoMapping;
2599 const bool fRawR0Enabled = EMIsRawRing0Enabled(pVM);
2600 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2601
2602 /* Only check mappings if they are supposed to be put into the shadow page table. */
2603 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
2604 {
2605 pMapping = pVM->pgm.s.CTXALLSUFF(pMappings);
2606 iPdNoMapping = (pMapping) ? pMapping->GCPtr >> PGDIR_SHIFT : ~0U;
2607 }
2608 else
2609 {
2610 pMapping = 0;
2611 iPdNoMapping = ~0U;
2612 }
2613
2614 for (unsigned iPD = 0; iPD < ELEMENTS(pPDSrc->a); iPD++)
2615 {
2616# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2617 Assert(&pVM->pgm.s.CTXMID(p,32BitPD)->a[iPD] == pPDEDst);
2618# else
2619 Assert(&pVM->pgm.s.CTXMID(ap,PaePDs)[iPD * 2 / 512]->a[iPD * 2 % 512] == pPDEDst);
2620# endif
2621 register VBOXPDE PdeSrc = pPDSrc->a[iPD];
2622 if ( PdeSrc.n.u1Present
2623 && (PdeSrc.n.u1User || fRawR0Enabled))
2624 {
2625 /*
2626 * Check for conflicts with GC mappings.
2627 */
2628 if (iPD == iPdNoMapping)
2629 {
2630 if (pVM->pgm.s.fMappingsFixed)
2631 {
2632 /* It's fixed, just skip the mapping. */
2633 const unsigned cPTs = pMapping->cPTs;
2634 iPD += cPTs - 1;
2635 pPDEDst += cPTs + (PGM_SHW_TYPE != PGM_TYPE_32BIT) * cPTs;
2636 pMapping = pMapping->CTXALLSUFF(pNext);
2637 iPdNoMapping = pMapping ? pMapping->GCPtr >> PGDIR_SHIFT : ~0U;
2638 continue;
2639 }
2640
2641#ifdef IN_RING3
2642 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD);
2643 if (VBOX_FAILURE(rc))
2644 return rc;
2645
2646 /*
2647 * Update iPdNoMapping and pMapping.
2648 */
2649 pMapping = pVM->pgm.s.pMappingsR3;
2650 while (pMapping && pMapping->GCPtr < (iPD << PGDIR_SHIFT))
2651 pMapping = pMapping->pNextR3;
2652 iPdNoMapping = pMapping ? pMapping->GCPtr >> PGDIR_SHIFT : ~0U;
2653#else
2654 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
2655 return VINF_PGM_SYNC_CR3;
2656#endif
2657 }
2658
2659 /*
2660 * Sync page directory entry.
2661 *
2662 * The current approach is to allocated the page table but to set
2663 * the entry to not-present and postpone the page table synching till
2664 * it's actually used.
2665 */
2666# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2667 const unsigned iPdShw = iPD; NOREF(iPdShw);
2668# else
2669 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
2670# endif
2671 {
2672 SHWPDE PdeDst = *pPDEDst;
2673 if (PdeDst.n.u1Present)
2674 {
2675 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2676 RTGCPHYS GCPhys;
2677 if ( !PdeSrc.b.u1Size
2678 || !(cr4 & X86_CR4_PSE))
2679 {
2680 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2681# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2682 GCPhys |= i * (PAGE_SIZE / 2);
2683# endif
2684 }
2685 else
2686 {
2687 GCPhys = PdeSrc.u & GST_PDE4M_PG_MASK;
2688# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2689 GCPhys |= i * X86_PAGE_2M_SIZE;
2690# endif
2691 }
2692
2693 if ( pShwPage->GCPhys == GCPhys
2694 && pShwPage->enmKind == PGM_BTH_NAME(CalcPageKind)(&PdeSrc, cr4)
2695 && ( pShwPage->fCached
2696 || ( !fGlobal
2697 && ( false
2698# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
2699 || ( (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
2700 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE)) /* global 2/4MB page. */
2701 || ( !pShwPage->fSeenNonGlobal
2702 && (cr4 & X86_CR4_PGE))
2703# endif
2704 )
2705 )
2706 )
2707 && ( (PdeSrc.u & (X86_PDE_US | X86_PDE_RW)) == (PdeDst.u & (X86_PDE_US | X86_PDE_RW))
2708 || ( (cr4 & X86_CR4_PSE)
2709 && ((PdeSrc.u & (X86_PDE_US | X86_PDE4M_PS | X86_PDE4M_D)) | PGM_PDFLAGS_TRACK_DIRTY)
2710 == ((PdeDst.u & (X86_PDE_US | X86_PDE_RW | PGM_PDFLAGS_TRACK_DIRTY)) | X86_PDE4M_PS))
2711 )
2712 )
2713 {
2714# ifdef VBOX_WITH_STATISTICS
2715 if ( !fGlobal
2716 && (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
2717 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE))
2718 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPD));
2719 else if (!fGlobal && !pShwPage->fSeenNonGlobal && (cr4 & X86_CR4_PGE))
2720 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPT));
2721 else
2722 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstCacheHit));
2723# endif /* VBOX_WITH_STATISTICS */
2724/** @todo a replacement strategy isn't really needed unless we're using a very small pool < 512 pages.
2725 * The whole ageing stuff should be put in yet another set of #ifdefs. For now, let's just skip it. */
2726//# ifdef PGMPOOL_WITH_CACHE
2727// pgmPoolCacheUsed(pPool, pShwPage);
2728//# endif
2729 }
2730 else
2731 {
2732 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPdShw);
2733 pPDEDst->u = 0;
2734 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreed));
2735 }
2736 }
2737 else
2738 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstNotPresent));
2739 pPDEDst++;
2740 }
2741 }
2742 else if (iPD != iPdNoMapping)
2743 {
2744 /*
2745 * Check if there is any page directory to mark not present here.
2746 */
2747# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2748 const unsigned iPdShw = iPD; NOREF(iPdShw);
2749# else
2750 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
2751# endif
2752 {
2753 if (pPDEDst->n.u1Present)
2754 {
2755 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst->u & SHW_PDE_PG_MASK), SHW_POOL_ROOT_IDX, iPdShw);
2756 pPDEDst->u = 0;
2757 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreedSrcNP));
2758 }
2759 pPDEDst++;
2760 }
2761 }
2762 else
2763 {
2764 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2765 const unsigned cPTs = pMapping->cPTs;
2766 if (pVM->pgm.s.fMappingsFixed)
2767 {
2768 /* It's fixed, just skip the mapping. */
2769 pMapping = pMapping->CTXALLSUFF(pNext);
2770 iPdNoMapping = pMapping ? pMapping->GCPtr >> PGDIR_SHIFT : ~0U;
2771 }
2772 else
2773 {
2774 /*
2775 * Check for conflicts for subsequent pagetables
2776 * and advance to the next mapping.
2777 */
2778 iPdNoMapping = ~0U;
2779 unsigned iPT = cPTs;
2780 while (iPT-- > 1)
2781 {
2782 if ( pPDSrc->a[iPD + iPT].n.u1Present
2783 && (pPDSrc->a[iPD + iPT].n.u1User || fRawR0Enabled))
2784 {
2785# ifdef IN_RING3
2786 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD);
2787 if (VBOX_FAILURE(rc))
2788 return rc;
2789
2790 /*
2791 * Update iPdNoMapping and pMapping.
2792 */
2793 pMapping = pVM->pgm.s.CTXALLSUFF(pMappings);
2794 while (pMapping && pMapping->GCPtr < (iPD << PGDIR_SHIFT))
2795 pMapping = pMapping->CTXALLSUFF(pNext);
2796 iPdNoMapping = pMapping ? pMapping->GCPtr >> PGDIR_SHIFT : ~0U;
2797 break;
2798# else
2799 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
2800 return VINF_PGM_SYNC_CR3;
2801# endif
2802 }
2803 }
2804 if (iPdNoMapping == ~0U && pMapping)
2805 {
2806 pMapping = pMapping->CTXALLSUFF(pNext);
2807 if (pMapping)
2808 iPdNoMapping = pMapping->GCPtr >> PGDIR_SHIFT;
2809 }
2810 }
2811 /* advance. */
2812 iPD += cPTs - 1;
2813 pPDEDst += cPTs + (PGM_SHW_TYPE != PGM_TYPE_32BIT) * cPTs;
2814 }
2815
2816 } /* for iPD */
2817# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2818# error "Guest 32-bit mode and shadow AMD64 mode doesn't add up!"
2819# endif
2820
2821 return VINF_SUCCESS;
2822
2823#elif PGM_GST_TYPE == PGM_TYPE_PAE
2824# if PGM_SHW_TYPE == PGM_TYPE_PAE
2825//# error not implemented
2826 return VERR_INTERNAL_ERROR;
2827
2828# else /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
2829# error "Guest PAE mode, but not the shadow mode ; 32bit - maybe, but amd64 no."
2830# endif /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
2831
2832#elif PGM_GST_TYPE == PGM_TYPE_AMD64
2833# if PGM_SHW_TYPE == PGM_TYPE_AMD64
2834//# error not implemented
2835 return VERR_INTERNAL_ERROR;
2836
2837# else /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
2838# error "Guest AMD64 mode, but not the shadow mode - that can't be right!"
2839# endif /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
2840
2841#else /* guest real and protected mode */
2842
2843 return VINF_SUCCESS;
2844#endif
2845}
2846
2847
2848
2849
2850#ifdef VBOX_STRICT
2851#ifdef IN_GC
2852# undef AssertMsgFailed
2853# define AssertMsgFailed Log
2854#endif
2855#ifdef IN_RING3
2856# include <VBox/dbgf.h>
2857
2858/**
2859 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
2860 *
2861 * @returns VBox status code (VINF_SUCCESS).
2862 * @param pVM The VM handle.
2863 * @param cr3 The root of the hierarchy.
2864 * @param crr The cr4, only PAE and PSE is currently used.
2865 * @param fLongMode Set if long mode, false if not long mode.
2866 * @param cMaxDepth Number of levels to dump.
2867 * @param pHlp Pointer to the output functions.
2868 */
2869__BEGIN_DECLS
2870PGMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
2871__END_DECLS
2872
2873#endif
2874
2875/**
2876 * Checks that the shadow page table is in sync with the guest one.
2877 *
2878 * @returns The number of errors.
2879 * @param pVM The virtual machine.
2880 * @param cr3 Guest context CR3 register
2881 * @param cr4 Guest context CR4 register
2882 * @param GCPtr Where to start. Defaults to 0.
2883 * @param cb How much to check. Defaults to everything.
2884 */
2885PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint32_t cr3, uint32_t cr4, RTGCUINTPTR GCPtr, RTGCUINTPTR cb)
2886{
2887 unsigned cErrors = 0;
2888
2889#if PGM_GST_TYPE == PGM_TYPE_32BIT
2890
2891# if PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
2892# error "Invalid shadow mode for 32-bit guest paging."
2893# endif
2894
2895 PPGM pPGM = &pVM->pgm.s;
2896 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
2897 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
2898 RTHCPHYS HCPhys; /* general usage. */
2899 int rc;
2900
2901 /*
2902 * Check that the Guest CR3 and all it's mappings are correct.
2903 */
2904 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & X86_CR3_PAGE_MASK),
2905 ("Invalid GCPhysCR3=%VGp cr3=%VGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
2906 false);
2907 rc = PGMShwGetPage(pVM, pPGM->pGuestPDGC, NULL, &HCPhysShw);
2908 AssertRCReturn(rc, 1);
2909 HCPhys = NIL_RTHCPHYS;
2910 rc = pgmRamGCPhys2HCPhys(pPGM, cr3 & X86_CR3_PAGE_MASK, &HCPhys);
2911 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%VHp HCPhyswShw=%VHp (cr3)\n", HCPhys, HCPhysShw), false);
2912# ifdef IN_RING3
2913 RTGCPHYS GCPhys;
2914 rc = PGMR3DbgHCPtr2GCPhys(pVM, pPGM->pGuestPDHC, &GCPhys);
2915 AssertRCReturn(rc, 1);
2916 AssertMsgReturn((cr3 & X86_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%VGp cr3=%VGp\n", GCPhys, (RTGCPHYS)cr3), false);
2917# endif
2918 const X86PD *pPDSrc = CTXSUFF(pPGM->pGuestPD);
2919
2920 /*
2921 * Get and check the Shadow CR3.
2922 */
2923# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2924 const X86PD *pPDDst = pPGM->CTXMID(p,32BitPD);
2925 unsigned cPDEs = ELEMENTS(pPDDst->a);
2926# else
2927 const X86PDPAE *pPDDst = pPGM->CTXMID(ap,PaePDs[0]); /* use it as a 2048 entry PD */
2928 unsigned cPDEs = ELEMENTS(pPDDst->a) * ELEMENTS(pPGM->apHCPaePDs);
2929# endif
2930 if (cb != ~(RTGCUINTPTR)0)
2931 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
2932
2933/** @todo call the other two PGMAssert*() functions. */
2934
2935 /*
2936 * Iterate the shadow page directory.
2937 */
2938 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
2939 unsigned iPDDst = GCPtr >> SHW_PD_SHIFT;
2940 cPDEs += iPDDst;
2941 for (;
2942 iPDDst < cPDEs;
2943 iPDDst++, GCPtr += _4G / cPDEs)
2944 {
2945 const SHWPDE PdeDst = pPDDst->a[iPDDst];
2946 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2947 {
2948 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2949 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
2950 {
2951 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
2952 cErrors++;
2953 continue;
2954 }
2955 }
2956 else if ( (PdeDst.u & X86_PDE_P)
2957 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
2958 )
2959 {
2960 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
2961 PPGMPOOLPAGE pPoolPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
2962 if (!pPoolPage)
2963 {
2964 AssertMsgFailed(("Invalid page table address %VGp at %VGv! PdeDst=%#RX64\n",
2965 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
2966 cErrors++;
2967 continue;
2968 }
2969 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
2970
2971 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
2972 {
2973 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %VGv! These flags are not virtualized! PdeDst=%#RX64\n",
2974 GCPtr, (uint64_t)PdeDst.u));
2975 cErrors++;
2976 }
2977
2978 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
2979 {
2980 AssertMsgFailed(("4K PDE reserved flags at %VGv! PdeDst=%#RX64\n",
2981 GCPtr, (uint64_t)PdeDst.u));
2982 cErrors++;
2983 }
2984
2985 const X86PDE PdeSrc = pPDSrc->a[iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)];
2986 if (!PdeSrc.n.u1Present)
2987 {
2988 AssertMsgFailed(("Guest PDE at %VGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
2989 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
2990 cErrors++;
2991 continue;
2992 }
2993
2994 if ( !PdeSrc.b.u1Size
2995 || !(cr4 & X86_CR4_PSE))
2996 {
2997 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
2998# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2999 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3000# endif
3001 }
3002 else
3003 {
3004 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3005 {
3006 AssertMsgFailed(("Guest PDE at %VGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3007 GCPtr, (uint64_t)PdeSrc.u));
3008 cErrors++;
3009 continue;
3010 }
3011 GCPhysGst = PdeSrc.u & GST_PDE4M_PG_MASK;
3012# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3013 GCPhysGst |= GCPtr & BIT(X86_PAGE_2M_SHIFT);
3014# endif
3015 }
3016
3017 if ( pPoolPage->enmKind
3018 != (!PdeSrc.b.u1Size || !(cr4 & X86_CR4_PSE) ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3019 {
3020 AssertMsgFailed(("Invalid shadow page table kind %d at %VGv! PdeSrc=%#RX64\n",
3021 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3022 cErrors++;
3023 }
3024
3025 PPGMPAGE pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3026 if (!pPhysPage)
3027 {
3028 AssertMsgFailed(("Cannot find guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3029 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3030 cErrors++;
3031 continue;
3032 }
3033
3034 if (GCPhysGst != pPoolPage->GCPhys)
3035 {
3036 AssertMsgFailed(("GCPhysGst=%VGp != pPage->GCPhys=%VGp at %VGv\n",
3037 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3038 cErrors++;
3039 continue;
3040 }
3041
3042 if ( !PdeSrc.b.u1Size
3043 || !(cr4 & X86_CR4_PSE))
3044 {
3045 /*
3046 * Page Table.
3047 */
3048 const GSTPT *pPTSrc;
3049 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3050 if (VBOX_FAILURE(rc))
3051 {
3052 AssertMsgFailed(("Cannot map/convert guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3053 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3054 cErrors++;
3055 continue;
3056 }
3057 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3058 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3059 {
3060 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3061 // (This problem will go away when/if we shadow multiple CR3s.)
3062 AssertMsgFailed(("4K PDE flags mismatch at %VGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3063 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3064 cErrors++;
3065 continue;
3066 }
3067 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3068 {
3069 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%VGv PdeDst=%#RX64\n",
3070 GCPtr, (uint64_t)PdeDst.u));
3071 cErrors++;
3072 continue;
3073 }
3074
3075 /* iterate the page table. */
3076# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3077 const unsigned offPTSrc = 0;
3078# else
3079 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3080# endif
3081 for (unsigned iPT = 0, off = 0;
3082 iPT < ELEMENTS(pPTDst->a);
3083 iPT++, off += PAGE_SIZE)
3084 {
3085 const SHWPTE PteDst = pPTDst->a[iPT];
3086
3087 /* skip not-present entries. */
3088 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3089 continue;
3090 Assert(PteDst.n.u1Present);
3091
3092 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3093 if (!PteSrc.n.u1Present)
3094 {
3095#ifdef IN_RING3
3096 PGMAssertHandlerAndFlagsInSync(pVM);
3097 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3098#endif
3099 AssertMsgFailed(("Out of sync (!P) PTE at %VGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%VGv iPTSrc=%x PdeSrc=%x physpte=%VGp\n",
3100 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3101 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3102 cErrors++;
3103 continue;
3104 }
3105
3106 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3107# if 1 /** @todo sync accessed bit properly... */
3108 fIgnoreFlags |= X86_PTE_A;
3109# endif
3110
3111 /* match the physical addresses */
3112 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3113 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3114
3115# ifdef IN_RING3
3116 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3117 if (VBOX_FAILURE(rc))
3118 {
3119 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3120 {
3121 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3122 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3123 cErrors++;
3124 continue;
3125 }
3126 }
3127 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3128 {
3129 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3130 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3131 cErrors++;
3132 continue;
3133 }
3134# endif
3135
3136 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3137 if (!pPhysPage)
3138 {
3139# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3140 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3141 {
3142 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3143 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3144 cErrors++;
3145 continue;
3146 }
3147# endif
3148 if (PteDst.n.u1Write)
3149 {
3150 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3151 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3152 cErrors++;
3153 }
3154 fIgnoreFlags |= X86_PTE_RW;
3155 }
3156 else if (HCPhysShw != (PGM_PAGE_GET_HCPHYS(pPhysPage) & SHW_PTE_PG_MASK))
3157 {
3158 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3159 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3160 cErrors++;
3161 continue;
3162 }
3163
3164 /* flags */
3165 if (pPhysPage->HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE)) /** @todo PAGE FLAGS */
3166 {
3167 if (pPhysPage->HCPhys & (MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE)) /** @todo PAGE FLAGS */
3168 {
3169 if (PteDst.n.u1Write)
3170 {
3171 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PteSrc=%#RX64 PteDst=%#RX64\n",
3172 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3173 cErrors++;
3174 continue;
3175 }
3176 fIgnoreFlags |= X86_PTE_RW;
3177 }
3178 else
3179 {
3180 if (PteDst.n.u1Present)
3181 {
3182 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VHp PteSrc=%#RX64 PteDst=%#RX64\n",
3183 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3184 cErrors++;
3185 continue;
3186 }
3187 fIgnoreFlags |= X86_PTE_P;
3188 }
3189 }
3190 else
3191 {
3192 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3193 {
3194 if (PteDst.n.u1Write)
3195 {
3196 AssertMsgFailed(("!DIRTY page at %VGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3197 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3198 cErrors++;
3199 continue;
3200 }
3201 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3202 {
3203 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3204 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3205 cErrors++;
3206 continue;
3207 }
3208 if (PteDst.n.u1Dirty)
3209 {
3210 AssertMsgFailed(("!DIRTY page at %VGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3211 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3212 cErrors++;
3213 }
3214# if 0 /** @todo sync access bit properly... */
3215 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3216 {
3217 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3218 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3219 cErrors++;
3220 }
3221 fIgnoreFlags |= X86_PTE_RW;
3222# else
3223 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3224# endif
3225 }
3226 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3227 {
3228 /* access bit emulation (not implemented). */
3229 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3230 {
3231 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3232 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3233 cErrors++;
3234 continue;
3235 }
3236 if (!PteDst.n.u1Accessed)
3237 {
3238 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3239 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3240 cErrors++;
3241 }
3242 fIgnoreFlags |= X86_PTE_P;
3243 }
3244# ifdef DEBUG_sandervl
3245 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3246# endif
3247 }
3248
3249 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3250 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
3251 )
3252 {
3253 AssertMsgFailed(("Flags mismatch at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3254 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3255 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3256 cErrors++;
3257 continue;
3258 }
3259 } /* foreach PTE */
3260 }
3261 else
3262 {
3263 /*
3264 * Big Page.
3265 */
3266 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | X86_PDE_PAE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
3267 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
3268 {
3269 if (PdeDst.n.u1Write)
3270 {
3271 AssertMsgFailed(("!DIRTY page at %VGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3272 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3273 cErrors++;
3274 continue;
3275 }
3276 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
3277 {
3278 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3279 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3280 cErrors++;
3281 continue;
3282 }
3283# if 0 /** @todo sync access bit properly... */
3284 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
3285 {
3286 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3287 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3288 cErrors++;
3289 }
3290 fIgnoreFlags |= X86_PTE_RW;
3291# else
3292 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3293# endif
3294 }
3295 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3296 {
3297 /* access bit emulation (not implemented). */
3298 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
3299 {
3300 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3301 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3302 cErrors++;
3303 continue;
3304 }
3305 if (!PdeDst.n.u1Accessed)
3306 {
3307 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3308 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3309 cErrors++;
3310 }
3311 fIgnoreFlags |= X86_PTE_P;
3312 }
3313
3314 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
3315 {
3316 AssertMsgFailed(("Flags mismatch (B) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
3317 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
3318 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3319 cErrors++;
3320 }
3321
3322 /* iterate the page table. */
3323 for (unsigned iPT = 0, off = 0;
3324 iPT < ELEMENTS(pPTDst->a);
3325 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
3326 {
3327 const SHWPTE PteDst = pPTDst->a[iPT];
3328
3329 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3330 {
3331 AssertMsgFailed(("The PTE at %VGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
3332 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3333 cErrors++;
3334 }
3335
3336 /* skip not-present entries. */
3337 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
3338 continue;
3339
3340 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3341
3342 /* match the physical addresses */
3343 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
3344
3345# ifdef IN_RING3
3346 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3347 if (VBOX_FAILURE(rc))
3348 {
3349 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3350 {
3351 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3352 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3353 cErrors++;
3354 }
3355 }
3356 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
3357 {
3358 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3359 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3360 cErrors++;
3361 continue;
3362 }
3363# endif
3364
3365 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3366 if (!pPhysPage)
3367 {
3368# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3369 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3370 {
3371 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3372 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3373 cErrors++;
3374 continue;
3375 }
3376# endif
3377 if (PteDst.n.u1Write)
3378 {
3379 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3380 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3381 cErrors++;
3382 }
3383 fIgnoreFlags |= X86_PTE_RW;
3384 }
3385 else if (HCPhysShw != (pPhysPage->HCPhys & X86_PTE_PAE_PG_MASK))
3386 {
3387 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3388 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3389 cErrors++;
3390 continue;
3391 }
3392
3393 /* flags */
3394 if (pPhysPage->HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE)) /** @todo PAGE FLAGS */
3395 {
3396 if (pPhysPage->HCPhys & (MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE)) /** @todo PAGE FLAGS */
3397 {
3398 if (!(pPhysPage->HCPhys & MM_RAM_FLAGS_PHYSICAL_TEMP_OFF)) /** @todo PAGE FLAGS */
3399 {
3400 if (PteDst.n.u1Write)
3401 {
3402 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
3403 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3404 cErrors++;
3405 continue;
3406 }
3407 fIgnoreFlags |= X86_PTE_RW;
3408 }
3409 }
3410 else
3411 {
3412 if (PteDst.n.u1Present)
3413 {
3414 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
3415 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3416 cErrors++;
3417 continue;
3418 }
3419 fIgnoreFlags |= X86_PTE_P;
3420 }
3421 }
3422
3423 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3424 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
3425 )
3426 {
3427 AssertMsgFailed(("Flags mismatch (BT) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
3428 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3429 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3430 cErrors++;
3431 continue;
3432 }
3433 } /* foreach PTE */
3434 }
3435 }
3436 /* not present */
3437
3438 } /* forearch PDE */
3439
3440# ifdef DEBUG
3441 if (cErrors)
3442 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
3443# endif
3444
3445#elif PGM_GST_TYPE == PGM_TYPE_PAE
3446//# error not implemented
3447
3448
3449#elif PGM_GST_TYPE == PGM_TYPE_AMD64
3450//# error not implemented
3451
3452/*#else: guest real and protected mode */
3453#endif
3454 return cErrors;
3455}
3456#endif /* VBOX_STRICT */
3457
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette