VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 2228

Last change on this file since 2228 was 2203, checked in by vboxsync, 18 years ago

prevent warning

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 149.4 KB
Line 
1/* $Id: PGMAllBth.h 2203 2007-04-19 09:02:14Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006 InnoTek Systemberatung GmbH
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License as published by the Free Software Foundation,
15 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
16 * distribution. VirtualBox OSE is distributed in the hope that it will
17 * be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * If you received this file as part of a commercial VirtualBox
20 * distribution, then only the terms of your commercial VirtualBox
21 * license agreement apply instead of the previous paragraph.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27__BEGIN_DECLS
28PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVM pVM, VBOXPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PVBOXPDE pPdeSrc, RTGCUINTPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PVBOXPD pPDSrc, RTGCUINTPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint32_t cr0, uint32_t cr3, uint32_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint32_t cr3, uint32_t cr4, RTGCUINTPTR GCPtr = 0, RTGCUINTPTR cb = ~(RTGCUINTPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42__END_DECLS
43
44
45/**
46 * #PF Handler for raw-mode guest execution.
47 *
48 * @returns VBox status code (appropriate for trap handling and GC return).
49 * @param pVM VM Handle.
50 * @param uErr The trap error code.
51 * @param pRegFrame Trap register frame.
52 * @param pvFault The fault address.
53 */
54PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
55{
56#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) && PGM_SHW_TYPE != PGM_TYPE_AMD64
57
58# if PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
59# error "32-bit guest mode is only implemented for 32-bit and PAE shadow modes."
60# endif
61
62# if PGM_SHW_TYPE == PGM_TYPE_PAE
63 /*
64 * Hide the instruction fetch trap indicator for now.
65 */
66 /** @todo NXE will change this and we must fix NXE in the switcher too! */
67 if (uErr & X86_TRAP_PF_ID)
68 {
69 uErr &= ~X86_TRAP_PF_ID;
70 TRPMSetErrorCode(pVM, uErr);
71 }
72# endif
73
74 /*
75 * Get PDs.
76 */
77 int rc;
78# if PGM_WITH_PAGING(PGM_GST_TYPE)
79 PVBOXPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
80 const unsigned iPDSrc = (RTGCUINTPTR)pvFault >> GST_PD_SHIFT;
81# else
82 PVBOXPD pPDSrc = NULL;
83 const unsigned iPDSrc = 0;
84# endif
85
86 const unsigned iPDDst = (RTGCUINTPTR)pvFault >> SHW_PD_SHIFT;
87# if PGM_SHW_TYPE == PGM_TYPE_32BIT
88 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
89# else /* PAE */
90 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]; /* We treat this as a PD with 2048 entries. */
91# endif
92
93# if PGM_WITH_PAGING(PGM_GST_TYPE)
94 /* Determine current privilege level */
95 uint32_t cpl = CPUMGetGuestCPL(pVM, pRegFrame);
96
97# ifdef PGM_SYNC_DIRTY_BIT
98 /*
99 * If we successfully correct the write protection fault due to dirty bit
100 * tracking, or this page fault is a genuine one, then return immediately.
101 */
102 STAM_PROFILE_START(&pVM->pgm.s.StatCheckPageFault, e);
103 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], (RTGCUINTPTR)pvFault);
104 STAM_PROFILE_STOP(&pVM->pgm.s.StatCheckPageFault, e);
105 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
106 || rc == VINF_EM_RAW_GUEST_TRAP)
107 {
108 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution)
109 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatTrap0eDirtyAndAccessedBits : &pVM->pgm.s.StatTrap0eGuestTrap; });
110 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
111 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
112 }
113# endif
114
115 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0ePD[iPDSrc]);
116# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
117
118 /*
119 * A common case is the not-present error caused by lazy page table syncing.
120 *
121 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
122 * so we can safely assume that the shadow PT is present when calling SyncPage later.
123 *
124 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
125 * of mapping conflict and defer to SyncCR3 in R3.
126 * (Again, we do NOT support access handlers for non-present guest pages.)
127 *
128 */
129# if PGM_WITH_PAGING(PGM_GST_TYPE)
130 VBOXPDE PdeSrc = pPDSrc->a[iPDSrc];
131# else
132 VBOXPDE PdeSrc;
133 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
134 PdeSrc.n.u1Present = 1;
135 PdeSrc.n.u1Write = 1;
136 PdeSrc.n.u1Accessed = 1;
137 PdeSrc.n.u1User = 1;
138# endif
139 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
140 && !pPDDst->a[iPDDst].n.u1Present
141 && PdeSrc.n.u1Present
142 )
143
144 {
145 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eSyncPT; });
146 STAM_PROFILE_START(&pVM->pgm.s.StatLazySyncPT, f);
147 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
148 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, (RTGCUINTPTR)pvFault);
149 if (VBOX_SUCCESS(rc))
150 {
151 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
152 return rc;
153 }
154 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
155 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
156 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
157 return VINF_PGM_SYNC_CR3;
158 }
159
160# if PGM_WITH_PAGING(PGM_GST_TYPE)
161 /*
162 * Check if this address is within any of our mappings.
163 *
164 * This is *very* fast and it's gonna save us a bit of effort below and prevent
165 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
166 * (BTW, it's impossible to have physical access handlers in a mapping.)
167 */
168 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
169 {
170 STAM_PROFILE_START(&pVM->pgm.s.StatMapping, a);
171 PPGMMAPPING pMapping = CTXSUFF(pVM->pgm.s.pMappings);
172 for ( ; pMapping; pMapping = CTXSUFF(pMapping->pNext))
173 {
174 if ((RTGCUINTPTR)pvFault < (RTGCUINTPTR)pMapping->GCPtr)
175 break;
176 if ((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pMapping->GCPtr < pMapping->cb)
177 {
178 /*
179 * The first thing we check is if we've got an undetected conflict.
180 */
181 if (!pVM->pgm.s.fMappingsFixed)
182 {
183 unsigned iPT = pMapping->cPTs;
184 while (iPT-- > 0)
185 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
186 {
187 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eConflicts);
188 Log(("Trap0e: Detected Conflict %VGv-%VGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
189 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
190 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
191 return VINF_PGM_SYNC_CR3;
192 }
193 }
194
195 /*
196 * Check if the fault address is in a virtual page access handler range.
197 */
198 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
199 if ( pCur
200 && pCur->enmType != PGMVIRTHANDLERTYPE_EIP
201 && (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
202 && ( uErr & X86_TRAP_PF_RW
203 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
204 && pCur->enmType != PGMVIRTHANDLERTYPE_HYPERVISOR) ) ) /** r=bird: <- this is probably wrong. */
205 {
206# ifdef IN_GC
207 STAM_PROFILE_START(&pCur->Stat, h);
208 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
209 STAM_PROFILE_STOP(&pCur->Stat, h);
210# else
211 AssertFailed();
212 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
213# endif
214 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eMapHandler);
215 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
216 return rc;
217 }
218
219 /*
220 * Check if the EIP is in a virtual page access handler range.
221 */
222 if (cpl == 0)
223 {
224 RTGCPTR pvEIP;
225 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &pvEIP);
226 if (VBOX_SUCCESS(rc))
227 {
228 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvEIP);
229 if ( pCur
230 && pCur->enmType == PGMVIRTHANDLERTYPE_EIP
231 && (RTGCUINTPTR)pvEIP - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
232 {
233# ifdef IN_GC
234 STAM_PROFILE_START(&pCur->Stat, h);
235 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvEIP - (RTGCUINTPTR)pCur->GCPtr);
236 STAM_PROFILE_STOP(&pCur->Stat, h);
237# else
238 AssertFailed();
239 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
240# endif
241 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eMapHandler);
242 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
243 return rc;
244 }
245 }
246 }
247
248 /*
249 * Pretend we're not here and let the guest handle the trap.
250 */
251 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
252 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eMap);
253 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
254 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
255 return VINF_EM_RAW_GUEST_TRAP;
256 }
257 }
258 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
259 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
260# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
261
262 /*
263 * Check if this fault address is flagged for special treatment,
264 * which means we'll have to figure out the physical address and
265 * check flags associated with it.
266 *
267 * ASSUME that we can limit any special access handling to pages
268 * in page tables which the guest believes to be present.
269 */
270 if (PdeSrc.n.u1Present)
271 {
272 RTGCPHYS GCPhys = ~0U;
273
274# if PGM_WITH_PAGING(PGM_GST_TYPE)
275 uint32_t cr4 = CPUMGetGuestCR4(pVM);
276 if ( PdeSrc.b.u1Size
277 && (cr4 & X86_CR4_PSE))
278 GCPhys = (PdeSrc.u & X86_PDE4M_PG_MASK)
279 | ((RTGCPHYS)pvFault & (PAGE_OFFSET_MASK_BIG ^ PAGE_OFFSET_MASK));
280 else
281 {
282 PVBOXPT pPTSrc;
283# ifdef IN_GC
284 rc = PGMGCDynMapGCPage(pVM, PdeSrc.u & X86_PDE_PG_MASK, (void **)&pPTSrc);
285# else
286 pPTSrc = (PVBOXPT)MMPhysGCPhys2HCVirt(pVM, PdeSrc.u & X86_PDE_PG_MASK, sizeof(*pPTSrc));
287 if (pPTSrc == 0)
288 rc = VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS;
289# endif
290 if (VBOX_SUCCESS(rc))
291 {
292 unsigned iPTESrc = ((RTGCUINTPTR)pvFault >> PAGE_SHIFT) & PTE_MASK;
293 if (pPTSrc->a[iPTESrc].n.u1Present)
294 GCPhys = pPTSrc->a[iPTESrc].u & X86_PTE_PG_MASK;
295 }
296 }
297# else
298 /* No paging so the fault address is the physical address */
299 GCPhys = (RTGCPHYS)((RTGCUINTPTR)pvFault & ~PAGE_OFFSET_MASK);
300# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
301
302 /*
303 * If we have a GC address we'll check if it has any flags set.
304 */
305 if (GCPhys != ~0U)
306 {
307 STAM_PROFILE_START(&pVM->pgm.s.StatHandlers, b);
308
309 RTHCPHYS HCPhys;
310 rc = PGMRamGCPhys2HCPhysWithFlags(&pVM->pgm.s, GCPhys, &HCPhys);
311 if (VBOX_SUCCESS(rc))
312 {
313 if (HCPhys & (MM_RAM_FLAGS_PHYSICAL_HANDLER | MM_RAM_FLAGS_VIRTUAL_HANDLER))
314 {
315 if (HCPhys & MM_RAM_FLAGS_PHYSICAL_HANDLER)
316 {
317 /*
318 * Physical page access handler.
319 */
320 const RTGCPHYS GCPhysFault = GCPhys | ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK);
321 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->PhysHandlers, GCPhysFault);
322 if (pCur)
323 {
324# ifdef PGM_SYNC_N_PAGES
325 /*
326 * If the region is write protected and we got a page not present fault, then sync
327 * the pages. If the fault was caused by a read, then restart the instruction.
328 * In case of write access continue to the GC write handler.
329 *
330 * ASSUMES that there is only one handler per page or that they have similar write properties.
331 */
332 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
333 && !(uErr & X86_TRAP_PF_P))
334 {
335 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
336 if ( VBOX_FAILURE(rc)
337 || !(uErr & X86_TRAP_PF_RW)
338 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
339 {
340 AssertRC(rc);
341 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
342 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
343 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
344 return rc;
345 }
346 }
347# endif
348
349 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
350 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
351 ("Unexpected trap for physical handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, HCPhys, uErr, pCur->enmType));
352
353#ifdef IN_GC
354 Assert(CTXSUFF(pCur->pfnHandler));
355 STAM_PROFILE_START(&pCur->Stat, h);
356 rc = pCur->CTXSUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, CTXSUFF(pCur->pvUser));
357 STAM_PROFILE_STOP(&pCur->Stat, h);
358#elif IN_RING0
359 if (CTXALLSUFF(pCur->pfnHandler))
360 {
361 STAM_PROFILE_START(&pCur->Stat, h);
362 rc = pCur->CTXALLSUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, CTXALLSUFF(pCur->pvUser));
363 STAM_PROFILE_STOP(&pCur->Stat, h);
364 }
365 else
366 rc = VINF_EM_RAW_EMULATE_INSTR;
367#else
368 rc = VINF_EM_RAW_EMULATE_INSTR;
369#endif
370 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersPhysical);
371 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
372 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndPhys; });
373 return rc;
374 }
375 }
376# if PGM_WITH_PAGING(PGM_GST_TYPE)
377 else
378 {
379# ifdef PGM_SYNC_N_PAGES
380 /*
381 * If the region is write protected and we got a page not present fault, then sync
382 * the pages. If the fault was caused by a read, then restart the instruction.
383 * In case of write access continue to the GC write handler.
384 */
385 if ( (HCPhys & (MM_RAM_FLAGS_VIRTUAL_WRITE | MM_RAM_FLAGS_VIRTUAL_ALL)) == MM_RAM_FLAGS_VIRTUAL_WRITE
386 && !(uErr & X86_TRAP_PF_P))
387 {
388 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
389 if ( VBOX_FAILURE(rc)
390 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
391 || !(uErr & X86_TRAP_PF_RW))
392 {
393 AssertRC(rc);
394 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
395 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
396 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndVirt; });
397 return rc;
398 }
399 }
400# endif
401 /*
402 * Ok, it's an virtual page access handler.
403 *
404 * Since it's faster to search by address, we'll do that first
405 * and then retry by GCPhys if that fails.
406 */
407 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
408 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
409 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
410 */
411 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
412 if (pCur)
413 {
414 AssertMsg(!((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
415 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
416 || !(uErr & X86_TRAP_PF_P)
417 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
418 ("Unexpected trap for virtual handler: %VGv (phys=%VGp) HCPhys=%HGp uErr=%X, enum=%d\n", pvFault, GCPhys, HCPhys, uErr, pCur->enmType));
419
420 if ( pCur->enmType != PGMVIRTHANDLERTYPE_EIP
421 && (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
422 && ( uErr & X86_TRAP_PF_RW
423 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
424 && pCur->enmType != PGMVIRTHANDLERTYPE_HYPERVISOR) ) ) /** @todo r=bird: _HYPERVISOR is impossible here because of mapping check. */
425 {
426# ifdef IN_GC
427 STAM_PROFILE_START(&pCur->Stat, h);
428 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
429 STAM_PROFILE_STOP(&pCur->Stat, h);
430# else
431 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
432# endif
433 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtual);
434 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
435 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
436 return rc;
437 }
438 /* Unhandled part of a monitored page */
439 }
440 else
441 {
442 /* Check by physical address. */
443 PPGMVIRTHANDLER pCur;
444 unsigned iPage;
445 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK),
446 &pCur, &iPage);
447 Assert(VBOX_SUCCESS(rc) || !pCur);
448 if ( pCur
449 && pCur->enmType != PGMVIRTHANDLERTYPE_EIP
450 && ( uErr & X86_TRAP_PF_RW
451 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
452 && pCur->enmType != PGMVIRTHANDLERTYPE_HYPERVISOR) ) )
453 {
454 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
455# ifdef IN_GC
456 RTGCUINTPTR off = (iPage << PAGE_SHIFT) + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK) - ((RTGCUINTPTR)pCur->GCPtr & PAGE_OFFSET_MASK);
457 Assert(off < pCur->cb);
458 STAM_PROFILE_START(&pCur->Stat, h);
459 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, off);
460 STAM_PROFILE_STOP(&pCur->Stat, h);
461# else
462 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
463# endif
464 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualByPhys);
465 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
466 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
467 return rc;
468 }
469 }
470 }
471# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
472
473 /*
474 * There is a handled area of the page, but this fault doesn't belong to it.
475 * We must emulate the instruction.
476 *
477 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
478 * we first check if this was a page-not-present fault for a page with only
479 * write access handlers. Restart the instruction if it wasn't a write access.
480 */
481 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersUnhandled);
482
483 if ( !(HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL))
484 && !(uErr & X86_TRAP_PF_P))
485 {
486 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
487 if ( VBOX_FAILURE(rc)
488 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
489 || !(uErr & X86_TRAP_PF_RW))
490 {
491 AssertRC(rc);
492 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
493 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
494 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
495 return rc;
496 }
497 }
498
499 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
500 * It's writing to an unhandled part of the LDT page several million times.
501 */
502 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
503 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d HCPhys=%VHp%s%s\n",
504 rc, HCPhys, HCPhys & MM_RAM_FLAGS_PHYSICAL_HANDLER ? " phys" : "",
505 HCPhys & MM_RAM_FLAGS_VIRTUAL_HANDLER ? " virt" : ""));
506 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
507 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndUnhandled; });
508 return rc;
509 } /* if any kind of handler */
510
511# if PGM_WITH_PAGING(PGM_GST_TYPE)
512 if (uErr & X86_TRAP_PF_P)
513 {
514 /*
515 * The page isn't marked, but it might still be monitored by a virtual page access handler.
516 * (ASSUMES no temporary disabling of virtual handlers.)
517 */
518 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
519 * we should correct both the shadow page table and physical memory flags, and not only check for
520 * accesses within the handler region but for access to pages with virtual handlers. */
521 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
522 if (pCur)
523 {
524 AssertMsg( !((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
525 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
526 || !(uErr & X86_TRAP_PF_P)
527 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
528 ("Unexpected trap for virtual handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, HCPhys, uErr, pCur->enmType));
529
530 if ( pCur->enmType != PGMVIRTHANDLERTYPE_EIP
531 && (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
532 && ( uErr & X86_TRAP_PF_RW
533 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
534 && pCur->enmType != PGMVIRTHANDLERTYPE_HYPERVISOR) ) ) /** @todo r=bird: _HYPERVISOR is impossible here because of mapping check. */
535 {
536# ifdef IN_GC
537 STAM_PROFILE_START(&pCur->Stat, h);
538 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
539 STAM_PROFILE_STOP(&pCur->Stat, h);
540# else
541 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
542# endif
543 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualUnmarked);
544 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
545 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
546 return rc;
547 }
548 }
549 }
550# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
551 }
552 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
553
554# ifdef PGM_OUT_OF_SYNC_IN_GC
555 /*
556 * We are here only if page is present in Guest page tables and trap is not handled
557 * by our handlers.
558 * Check it for page out-of-sync situation.
559 */
560 STAM_PROFILE_START(&pVM->pgm.s.StatOutOfSync, c);
561
562 if (!(uErr & X86_TRAP_PF_P))
563 {
564 /*
565 * Page is not present in our page tables.
566 * Try to sync it!
567 * BTW, fPageShw is invalid in this branch!
568 */
569 if (uErr & X86_TRAP_PF_US)
570 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
571 else /* supervisor */
572 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
573
574# if defined(LOG_ENABLED) && !defined(IN_RING0)
575 RTGCPHYS GCPhys;
576 uint64_t fPageGst;
577 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
578 Log(("Page out of sync: %p eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%VGp scan=%d\n",
579 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTGCPTR)pRegFrame->eip)));
580# endif /* LOG_ENABLED */
581
582# if PGM_WITH_PAGING(PGM_GST_TYPE) && !defined(IN_RING0)
583 if (cpl == 0)
584 {
585 uint64_t fPageGst;
586 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
587 if ( VBOX_SUCCESS(rc)
588 && !(fPageGst & X86_PTE_US))
589 {
590 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
591 if ( pvFault == (RTGCPTR)pRegFrame->eip
592 || (RTGCUINTPTR)pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
593# ifdef CSAM_DETECT_NEW_CODE_PAGES
594 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
595 && CSAMDoesPageNeedScanning(pVM, (RTGCPTR)pRegFrame->eip)) /* any new code we encounter here */
596# endif /* CSAM_DETECT_NEW_CODE_PAGES */
597 )
598 {
599 LogFlow(("CSAMExecFault %VGv\n", pRegFrame->eip));
600 rc = CSAMExecFault(pVM, (RTGCPTR)pRegFrame->eip);
601 if (rc != VINF_SUCCESS)
602 {
603 /*
604 * CSAM needs to perform a job in ring 3.
605 *
606 * Sync the page before going to the host context; otherwise we'll end up in a loop if
607 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
608 */
609 LogFlow(("CSAM ring 3 job\n"));
610 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
611 AssertRC(rc2);
612
613 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
614 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eCSAM; });
615 return rc;
616 }
617 }
618# ifdef CSAM_DETECT_NEW_CODE_PAGES
619 else
620 if ( uErr == X86_TRAP_PF_RW
621 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
622 && pRegFrame->ecx < 0x10000
623 )
624 {
625 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
626 * to detect loading of new code pages.
627 */
628
629 /*
630 * Decode the instruction.
631 */
632 RTGCPTR PC;
633 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
634 if (rc == VINF_SUCCESS)
635 {
636 DISCPUSTATE Cpu;
637 uint32_t cbOp;
638 rc = EMInterpretDisasOneEx(pVM, (RTGCUINTPTR)PC, pRegFrame, &Cpu, &cbOp);
639
640 /* For now we'll restrict this to rep movsw/d instructions */
641 if ( rc == VINF_SUCCESS
642 && Cpu.pCurInstr->opcode == OP_MOVSWD
643 && (Cpu.prefix & PREFIX_REP))
644 {
645 CSAMMarkPossibleCodePage(pVM, pvFault);
646 }
647 }
648 }
649# endif /* CSAM_DETECT_NEW_CODE_PAGES */
650
651 /*
652 * Mark this page as safe.
653 */
654 /** @todo not correct for pages that contain both code and data!! */
655 Log2(("CSAMMarkPage %p; scanned=%d\n", pvFault, true));
656 CSAMMarkPage(pVM, pvFault, true);
657 }
658 }
659# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
660 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
661 if (VBOX_SUCCESS(rc))
662 {
663 /* The page was successfully synced, return to the guest. */
664 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
665 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSync; });
666 return VINF_SUCCESS;
667 }
668 }
669 else
670 {
671 /*
672 * A side effect of not flushing global PDEs are out of sync pages due
673 * to physical monitored regions, that are no longer valid.
674 * Assume for now it only applies to the read/write flag
675 */
676 if (VBOX_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
677 {
678 if (uErr & X86_TRAP_PF_US)
679 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
680 else /* supervisor */
681 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
682
683
684 /*
685 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the page is not present, which is not true in this case.
686 */
687 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
688 if (VBOX_SUCCESS(rc))
689 {
690 /*
691 * Page was successfully synced, return to guest.
692 */
693# ifdef VBOX_STRICT
694 RTGCPHYS GCPhys;
695 uint64_t fPageGst;
696 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
697 Assert(VBOX_SUCCESS(rc) && fPageGst & X86_PTE_RW);
698 LogFlow(("Obsolete physical monitor page out of sync %VGv - phys %VGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
699
700 uint64_t fPageShw;
701 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
702 Assert(VBOX_SUCCESS(rc) && fPageShw & X86_PTE_RW);
703# endif /* VBOX_STRICT */
704 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
705 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncObsHnd; });
706 return VINF_SUCCESS;
707 }
708 }
709
710# if PGM_WITH_PAGING(PGM_GST_TYPE)
711# ifdef VBOX_STRICT
712 /*
713 * Check for VMM page flags vs. Guest page flags consistency.
714 * Currently only for debug purposes.
715 */
716 if (VBOX_SUCCESS(rc))
717 {
718 /* Get guest page flags. */
719 uint64_t fPageGst;
720 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
721 if (VBOX_SUCCESS(rc))
722 {
723 uint64_t fPageShw;
724 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
725
726 /*
727 * Compare page flags.
728 * Note: we have AVL, A, D bits desynched.
729 */
730 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
731 ("Page flags mismatch! pvFault=%p GCPhys=%VGp fPageShw=%08llx fPageGst=%08llx\n", pvFault, GCPhys, fPageShw, fPageGst));
732 }
733 else
734 AssertMsgFailed(("PGMGstGetPage rc=%Vrc\n", rc));
735 }
736 else
737 AssertMsgFailed(("PGMGCGetPage rc=%Vrc\n", rc));
738# endif /* VBOX_STRICT */
739# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
740 }
741 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
742# endif /* PGM_OUT_OF_SYNC_IN_GC */
743 }
744 else
745 {
746 /*
747 * Page not present in Guest OS or invalid page table address.
748 * This is potential virtual page access handler food.
749 *
750 * For the present we'll say that our access handlers don't
751 * work for this case - we've already discarded the page table
752 * not present case which is identical to this.
753 *
754 * When we perchance find we need this, we will probably have AVL
755 * trees (offset based) to operate on and we can measure their speed
756 * agains mapping a page table and probably rearrange this handling
757 * a bit. (Like, searching virtual ranges before checking the
758 * physical address.)
759 */
760 }
761 }
762
763
764# if PGM_WITH_PAGING(PGM_GST_TYPE)
765 /*
766 * Check if it's in a EIP based virtual page access handler range.
767 * This is only used for supervisor pages in flat mode.
768 */
769 /** @todo this stuff is completely broken by the out-of-sync stuff. since we don't use this stuff, that's not really a problem yet. */
770 STAM_PROFILE_START(&pVM->pgm.s.StatEIPHandlers, d);
771 if (cpl == 0)
772 {
773 RTGCPTR pvEIP;
774 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &pvEIP);
775 if ( VBOX_SUCCESS(rc)
776 && pvEIP == (RTGCPTR)pRegFrame->eip)
777 {
778 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvEIP);
779 if ( pCur
780 && pCur->enmType == PGMVIRTHANDLERTYPE_EIP
781 && (RTGCUINTPTR)pvEIP - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
782 {
783 LogFlow(("EIP handler\n"));
784# ifdef IN_GC
785 STAM_PROFILE_START(&pCur->Stat, h);
786 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvEIP - (RTGCUINTPTR)pCur->GCPtr);
787 STAM_PROFILE_STOP(&pCur->Stat, h);
788# else
789 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
790# endif
791 STAM_PROFILE_STOP(&pVM->pgm.s.StatEIPHandlers, d);
792 return rc;
793 }
794 }
795 }
796 STAM_PROFILE_STOP(&pVM->pgm.s.StatEIPHandlers, d);
797
798 /*
799 * Conclusion, this is a guest trap.
800 */
801 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
802 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eUnhandled);
803 return VINF_EM_RAW_GUEST_TRAP;
804# else
805 /* present, but not a monitored page; perhaps the guest is probing physical memory */
806 return VINF_EM_RAW_EMULATE_INSTR;
807# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
808
809
810#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
811
812 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
813 return VERR_INTERNAL_ERROR;
814#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
815}
816
817
818/**
819 * Emulation of the invlpg instruction.
820 *
821 *
822 * @returns VBox status code.
823 *
824 * @param pVM VM handle.
825 * @param GCPtrPage Page to invalidate.
826 *
827 * @remark ASSUMES that the guest is updating before invalidating. This order
828 * isn't required by the CPU, so this is speculative and could cause
829 * trouble.
830 *
831 * @todo Flush page or page directory only if necessary!
832 * @todo Add a #define for simply invalidating the page.
833 */
834PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage)
835{
836#if PGM_GST_TYPE == PGM_TYPE_32BIT
837
838 LogFlow(("InvalidatePage %x\n", GCPtrPage));
839# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE
840 /*
841 * Get the shadow PD entry and skip out if this PD isn't present.
842 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
843 */
844 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
845# if PGM_SHW_TYPE == PGM_TYPE_32BIT
846 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
847# else
848 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs[0])->a[iPDDst];
849# endif
850 const SHWPDE PdeDst = *pPdeDst;
851 if (!PdeDst.n.u1Present)
852 {
853 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
854 return VINF_SUCCESS;
855 }
856
857 /*
858 * Get the guest PD entry and calc big page.
859 */
860 PVBOXPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
861 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
862 VBOXPDE PdeSrc = pPDSrc->a[iPDSrc];
863 const uint32_t cr4 = CPUMGetGuestCR4(pVM);
864 const bool fIsBigPage = PdeSrc.b.u1Size && (cr4 & X86_CR4_PSE);
865
866# ifdef IN_RING3
867 /*
868 * If a CR3 Sync is pending we may ignore the invalidate page operation
869 * depending on the kind of sync and if it's a global page or not.
870 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
871 */
872# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
873 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
874 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
875 && fIsBigPage
876 && PdeSrc.b.u1Global
877 && (cr4 & X86_CR4_PGE)
878 )
879 )
880# else
881 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
882# endif
883 {
884 STAM_COUNTER_INC(&pVM->pgm.s.StatHCInvalidatePageSkipped);
885 return VINF_SUCCESS;
886 }
887# endif /* IN_RING3 */
888
889
890 /*
891 * Deal with the Guest PDE.
892 */
893 int rc = VINF_SUCCESS;
894 if (PdeSrc.n.u1Present)
895 {
896 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
897 {
898 /*
899 * Conflict - Let SyncPT deal with it to avoid duplicate code.
900 */
901 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
902 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
903 }
904 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
905 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
906 {
907 /*
908 * Mark not present so we can resync the PDE when it's used.
909 */
910 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
911 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
912 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
913 pPdeDst->u = 0;
914 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
915 PGM_INVL_GUEST_TLBS();
916 }
917# ifdef PGM_SYNC_ACCESSED_BIT
918 else if (!PdeSrc.n.u1Accessed)
919 {
920 /*
921 * Mark not present so we can set the accessed bit.
922 */
923 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
924 pPdeDst->u = 0;
925 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
926 PGM_INVL_GUEST_TLBS();
927 }
928# endif
929 else if (!fIsBigPage)
930 {
931 /*
932 * 4KB - page.
933 */
934 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
935 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
936# if PGM_SHW_TYPE != PGM_TYPE_32BIT
937 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
938# endif
939 if (pShwPage->GCPhys == GCPhys)
940 {
941#if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
942 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
943 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
944 if (pPT->a[iPTEDst].n.u1Present)
945 {
946# ifdef PGMPOOL_WITH_USER_TRACKING
947 /* This is very unlikely with caching/monitoring enabled. */
948 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
949# endif
950 pPT->a[iPTEDst].u = 0;
951 }
952#else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
953 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
954 if (VBOX_SUCCESS(rc))
955 rc = VINF_SUCCESS;
956#endif
957 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4KBPages));
958 PGM_INVL_PG(GCPtrPage);
959 }
960 else
961 {
962 /*
963 * The page table address changed.
964 */
965 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%VGp iPDDst=%#x\n",
966 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
967 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
968 pPdeDst->u = 0;
969 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
970 PGM_INVL_GUEST_TLBS();
971 }
972 }
973 else
974 {
975 /*
976 * 4MB - page.
977 */
978 /* Before freeing the page, check if anything really changed. */
979 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
980 RTGCPHYS GCPhys = PdeSrc.u & X86_PDE4M_PG_MASK;
981# if PGM_SHW_TYPE != PGM_TYPE_32BIT
982 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
983# endif
984 if ( pShwPage->GCPhys == GCPhys
985 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
986 {
987 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
988 /** @todo PAT */
989# ifdef PGM_SYNC_DIRTY_BIT
990 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
991 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
992 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
993 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
994# else
995 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
996 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD)))
997# endif
998 {
999 LogFlow(("Skipping flush for big page containing %VGv (PD=%X)-> nothing has changed!\n", GCPtrPage, iPDSrc));
1000 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPagesSkip));
1001 return VINF_SUCCESS;
1002 }
1003 }
1004
1005 /*
1006 * Ok, the page table is present and it's been changed in the guest.
1007 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1008 * We could do this for some flushes in GC too, but we need an algorithm for
1009 * deciding which 4MB pages containing code likely to be executed very soon.
1010 */
1011 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1012 pPdeDst->u = 0;
1013 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPages));
1014 DUMP_PDE_BIG("PGMInvalidatePage", iPDSrc, PdeSrc);
1015 PGM_INVL_BIG_PG(GCPtrPage);
1016 }
1017 }
1018 else
1019 {
1020 /*
1021 * Page directory is not present, mark shadow PDE not present.
1022 */
1023 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1024 {
1025 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1026 pPdeDst->u = 0;
1027 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
1028 PGM_INVL_PG(GCPtrPage);
1029 }
1030 else
1031 {
1032 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1033 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDMappings));
1034 }
1035 }
1036
1037 return rc;
1038
1039# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1040# error "Guest 32-bit mode and shadow AMD64 mode doesn't add up!"
1041# endif
1042 return VINF_SUCCESS;
1043
1044#elif PGM_GST_TYPE == PGM_TYPE_PAE
1045# if PGM_SHW_TYPE == PGM_TYPE_PAE
1046//# error not implemented
1047 return VERR_INTERNAL_ERROR;
1048
1049# else /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
1050# error "Guest PAE mode, but not the shadow mode ; 32bit - maybe, but amd64 no."
1051# endif /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
1052
1053#elif PGM_GST_TYPE == PGM_TYPE_AMD64
1054# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1055//# error not implemented
1056 return VERR_INTERNAL_ERROR;
1057
1058# else /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
1059# error "Guest AMD64 mode, but not the shadow mode - that can't be right!"
1060# endif /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
1061
1062#else /* guest real and protected mode */
1063 /* There's no such thing when paging is disabled. */
1064 return VINF_SUCCESS;
1065#endif
1066}
1067
1068
1069#ifdef PGMPOOL_WITH_USER_TRACKING
1070/**
1071 * Update the tracking of shadowed pages.
1072 *
1073 * @param pVM The VM handle.
1074 * @param pShwPage The shadow page.
1075 * @param HCPhys The physical page we is being dereferenced.
1076 */
1077DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1078{
1079# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1080 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1081 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%VHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1082
1083 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1084 * 1. have a medium sized HCPhys -> GCPhys cache (hash?)
1085 * 2. write protect all shadowed pages. I.e. implement caching.
1086 */
1087 /*
1088 * Find the guest address.
1089 */
1090 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTXSUFF(pRamRanges);
1091 pRam;
1092 pRam = pRam->CTXSUFF(pNext))
1093 {
1094 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1095 while (iPage-- > 0)
1096 {
1097 if ((pRam->aHCPhys[iPage] & X86_PTE_PAE_PG_MASK) == HCPhys)
1098 {
1099 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1100 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aHCPhys[iPage]);
1101 pShwPage->cPresent--;
1102 pPool->cPresent--;
1103 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1104 return;
1105 }
1106 }
1107 }
1108
1109 for (;;)
1110 AssertReleaseMsgFailed(("HCPhys=%VHp wasn't found!\n", HCPhys));
1111# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1112 pShwPage->cPresent--;
1113 pVM->pgm.s.CTXSUFF(pPool)->cPresent--;
1114# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1115}
1116
1117
1118/**
1119 * Update the tracking of shadowed pages.
1120 *
1121 * @param pVM The VM handle.
1122 * @param pShwPage The shadow page.
1123 * @param u16 The top 16-bit of the *pHCPhys.
1124 * @param pHCPhys Pointer to the ram range physical page entry.
1125 * @param iPTDst The index into the shadow table.
1126 */
1127DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PRTHCPHYS pHCPhys, const unsigned iPTDst)
1128{
1129# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1130 /*
1131 * We're making certain assumptions about the placement of cRef and idx.
1132 */
1133 Assert(MM_RAM_FLAGS_IDX_SHIFT == 48);
1134 Assert(MM_RAM_FLAGS_CREFS_SHIFT > MM_RAM_FLAGS_IDX_SHIFT);
1135
1136 /*
1137 * Just deal with the simple first time here.
1138 */
1139 if (!u16)
1140 {
1141 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1142 u16 = (1 << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) | pShwPage->idx;
1143 }
1144 else
1145 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1146
1147 /* write back, trying to be clever... */
1148 Log2(("SyncPageWorkerTrackAddRef: u16=%#x *pHCPhys=%VHp->%VHp iPTDst=%#x\n",
1149 u16, *pHCPhys, (*pHCPhys & MM_RAM_FLAGS_NO_REFS_MASK) | ((uint64_t)u16 << MM_RAM_FLAGS_CREFS_SHIFT), iPTDst));
1150 *((uint16_t *)pHCPhys + 3) = u16;
1151# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1152
1153 /* update statistics. */
1154 pVM->pgm.s.CTXSUFF(pPool)->cPresent++;
1155 pShwPage->cPresent++;
1156 if (pShwPage->iFirstPresent > iPTDst)
1157 pShwPage->iFirstPresent = iPTDst;
1158}
1159#endif /* PGMPOOL_WITH_USER_TRACKING */
1160
1161
1162/**
1163 * Creates a 4K shadow page for a guest page.
1164 *
1165 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1166 * physical address. The PdeSrc argument only the flags are used. No page structured
1167 * will be mapped in this function.
1168 *
1169 * @param pVM VM handle.
1170 * @param pPteDst Destination page table entry.
1171 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1172 * Can safely assume that only the flags are being used.
1173 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1174 * @param pShwPage Pointer to the shadow page.
1175 * @param iPTDst The index into the shadow table.
1176 *
1177 * @remark Not used for 2/4MB pages!
1178 */
1179DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, VBOXPDE PdeSrc, VBOXPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1180{
1181 if (PteSrc.n.u1Present)
1182 {
1183 /*
1184 * Find the ram range.
1185 */
1186 PRTHCPHYS pHCPhys;
1187 int rc = PGMRamGCPhys2PagePtr(&pVM->pgm.s, PteSrc.u & X86_PTE_PG_MASK, &pHCPhys);
1188 if (VBOX_SUCCESS(rc))
1189 {
1190 /** @todo investiage PWT, PCD and PAT. */
1191 /*
1192 * Make page table entry.
1193 */
1194 const RTHCPHYS HCPhys = *pHCPhys;
1195 SHWPTE PteDst;
1196 if (HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE))
1197 {
1198 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1199 if (!(HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL)))
1200 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1201 | (HCPhys & X86_PTE_PAE_PG_MASK);
1202 else
1203 PteDst.u = 0;
1204 /** @todo count these two kinds. */
1205 }
1206 else
1207 {
1208#ifdef PGM_SYNC_DIRTY_BIT
1209# ifdef PGM_SYNC_ACCESSED_BIT
1210 /*
1211 * If the page or page directory entry is not marked accessed,
1212 * we mark the page not present.
1213 */
1214 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1215 {
1216 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,AccessedPage));
1217 PteDst.u = 0;
1218 }
1219 else
1220# endif
1221 /*
1222 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1223 * when the page is modified.
1224 */
1225 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1226 {
1227 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPage));
1228 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1229 | (HCPhys & X86_PTE_PAE_PG_MASK)
1230 | PGM_PTFLAGS_TRACK_DIRTY;
1231 }
1232 else
1233 {
1234 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageSkipped));
1235 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1236 | (HCPhys & X86_PTE_PAE_PG_MASK);
1237 }
1238#endif
1239 }
1240
1241#ifdef PGMPOOL_WITH_USER_TRACKING
1242 /*
1243 * Keep user track up to date.
1244 */
1245 if (PteDst.n.u1Present)
1246 {
1247 if (!pPteDst->n.u1Present)
1248 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pHCPhys, iPTDst);
1249 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1250 {
1251 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1252 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1253 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pHCPhys, iPTDst);
1254 }
1255 }
1256 else if (pPteDst->n.u1Present)
1257 {
1258 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1259 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1260 }
1261#endif /* PGMPOOL_WITH_USER_TRACKING */
1262
1263 /*
1264 * Update statistics and commit the entry.
1265 */
1266 if (!PteSrc.n.u1Global)
1267 pShwPage->fSeenNonGlobal = true;
1268 *pPteDst = PteDst;
1269 }
1270 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1271 /** @todo count these. */
1272 }
1273 else
1274 {
1275 /*
1276 * Page not-present.
1277 */
1278#ifdef PGMPOOL_WITH_USER_TRACKING
1279 /* Keep user track up to date. */
1280 if (pPteDst->n.u1Present)
1281 {
1282 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1283 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1284 }
1285#endif /* PGMPOOL_WITH_USER_TRACKING */
1286 pPteDst->u = 0;
1287 /** @todo count these. */
1288 }
1289}
1290
1291
1292/**
1293 * Syncs a guest OS page.
1294 *
1295 * There are no conflicts at this point, neither is there any need for
1296 * page table allocations.
1297 *
1298 * @returns VBox status code.
1299 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1300 * @param pVM VM handle.
1301 * @param PdeSrc Page directory entry of the guest.
1302 * @param GCPtrPage Guest context page address.
1303 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1304 * @param uErr Fault error (X86_TRAP_PF_*).
1305 */
1306PGM_BTH_DECL(int, SyncPage)(PVM pVM, VBOXPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr)
1307{
1308 LogFlow(("SyncPage: GCPtrPage=%VGv cPages=%d uErr=%#x\n", GCPtrPage, cPages, uErr));
1309
1310#if PGM_GST_TYPE == PGM_TYPE_32BIT
1311
1312# if PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
1313# error "Invalid shadow mode for 32-bit guest mode!"
1314# endif
1315
1316 /*
1317 * Assert preconditions.
1318 */
1319# if GC_ARCH_BITS != 32
1320 Assert(GCPtrPage < _4G); //???
1321# endif
1322 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPagePD[(GCPtrPage >> X86_PD_SHIFT) & X86_PD_MASK]);
1323 Assert(PdeSrc.n.u1Present);
1324 Assert(cPages);
1325
1326 /*
1327 * Get the shadow PDE, find the shadow page table in the pool.
1328 */
1329 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1330# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1331 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1332# else /* PAE */
1333 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1334# endif
1335 Assert(PdeDst.n.u1Present);
1336 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1337
1338 /*
1339 * Check that the page is present and that the shadow PDE isn't out of sync.
1340 */
1341 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1342 RTGCPHYS GCPhys;
1343 if (!fBigPage)
1344 {
1345 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1346# if PGM_SHW_TYPE != PGM_TYPE_32BIT
1347 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1348# endif
1349 }
1350 else
1351 {
1352 GCPhys = PdeSrc.u & GST_PDE4M_PG_MASK;
1353# if PGM_SHW_TYPE != PGM_TYPE_32BIT
1354 GCPhys |= GCPtrPage & X86_PAGE_2M_SIZE;
1355# endif
1356 }
1357 if ( pShwPage->GCPhys == GCPhys
1358 && PdeSrc.n.u1Present
1359 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1360 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1361 )
1362 {
1363# ifdef PGM_SYNC_ACCESSED_BIT
1364 /*
1365 * Check that the PDE is marked accessed already.
1366 * Since we set the accessed bit *before* getting here on a #PF, this
1367 * check is only meant for dealing with non-#PF'ing paths.
1368 */
1369 if (PdeSrc.n.u1Accessed)
1370# endif
1371 {
1372 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1373 if (!fBigPage)
1374 {
1375 /*
1376 * 4KB Page - Map the guest page table.
1377 */
1378 PVBOXPT pPTSrc;
1379 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & X86_PDE_PG_MASK, &pPTSrc);
1380 if (VBOX_SUCCESS(rc))
1381 {
1382# ifdef PGM_SYNC_N_PAGES
1383 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1384 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1385 {
1386 /*
1387 * This code path is currently only taken when the caller is PGMTrap0eHandler
1388 * for non-present pages!
1389 *
1390 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1391 * deal with locality.
1392 */
1393 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1394# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1395 const unsigned offPTSrc = 0;
1396# else
1397 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1398# endif
1399 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1400 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1401 iPTDst = 0;
1402 else
1403 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1404 for (; iPTDst < iPTDstEnd; iPTDst++)
1405 {
1406 if (!pPTDst->a[iPTDst].n.u1Present)
1407 {
1408 VBOXPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1409 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(X86_PT_MASK << X86_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1410 NOREF(GCPtrCurPage);
1411#ifndef IN_RING0
1412 /*
1413 * Assuming kernel code will be marked as supervisor - and not as user level
1414 * and executed using a conforming code selector - And marked as readonly.
1415 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1416 */
1417 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1418 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1419 || !CSAMDoesPageNeedScanning(pVM, (RTGCPTR)GCPtrCurPage)
1420 || PGMRamTestFlags(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK,
1421 MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE)
1422 )
1423#endif
1424 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1425 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1426 GCPtrCurPage, PteSrc.n.u1Present,
1427 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1428 PteSrc.n.u1User & PdeSrc.n.u1User,
1429 (uint64_t)PteSrc.u,
1430 (uint64_t)pPTDst->a[iPTDst].u,
1431 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1432 }
1433 }
1434 }
1435 else
1436# endif /* PGM_SYNC_N_PAGES */
1437 {
1438 const unsigned iPTSrc = (GCPtrPage >> X86_PT_SHIFT) & X86_PT_MASK;
1439 VBOXPTE PteSrc = pPTSrc->a[iPTSrc];
1440 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1441 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1442 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1443 GCPtrPage, PteSrc.n.u1Present,
1444 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1445 PteSrc.n.u1User & PdeSrc.n.u1User,
1446 (uint64_t)PteSrc.u,
1447 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1448 }
1449 }
1450 else /* MMIO or invalid page: emulated in #PF handler. */
1451 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1452 }
1453 else
1454 {
1455 /*
1456 * 4/2MB page - lazy syncing shadow 4K pages.
1457 * (There are many causes of getting here, it's no longer only CSAM.)
1458 */
1459 /* Calculate the GC physical address of this 4KB shadow page. */
1460 RTGCPHYS GCPhys = (PdeSrc.u & X86_PDE4M_PAE_PG_MASK) | ((RTGCUINTPTR)GCPtrPage & PAGE_OFFSET_MASK_BIG);
1461 /* Find ram range. */
1462 PRTHCPHYS pHCPhys;
1463 int rc = PGMRamGCPhys2PagePtr(&pVM->pgm.s, GCPhys, &pHCPhys);
1464 if (VBOX_SUCCESS(rc))
1465 {
1466 /*
1467 * Make shadow PTE entry.
1468 */
1469 RTHCPHYS HCPhys = *pHCPhys;
1470 SHWPTE PteDst;
1471 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1472 | (HCPhys & X86_PTE_PAE_PG_MASK);
1473 if (HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE))
1474 {
1475 if (!(HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL)))
1476 PteDst.n.u1Write = 0;
1477 else
1478 PteDst.u = 0;
1479 }
1480 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1481# ifdef PGMPOOL_WITH_USER_TRACKING
1482 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1483 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pHCPhys, iPTDst);
1484# endif
1485 pPTDst->a[iPTDst] = PteDst;
1486
1487
1488# ifdef PGM_SYNC_DIRTY_BIT
1489 /*
1490 * If the page is not flagged as dirty and is writable, then make it read-only
1491 * at PD level, so we can set the dirty bit when the page is modified.
1492 *
1493 * ASSUMES that page access handlers are implemented on page table entry level.
1494 * Thus we will first catch the dirty access and set PDE.D and restart. If
1495 * there is an access handler, we'll trap again and let it work on the problem.
1496 */
1497 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1498 * As for invlpg, it simply frees the whole shadow PT.
1499 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1500 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1501 {
1502 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
1503 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1504 PdeDst.n.u1Write = 0;
1505 }
1506 else
1507 {
1508 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1509 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1510 }
1511# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1512 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst] = PdeDst;
1513# else /* PAE */
1514 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst] = PdeDst;
1515# endif
1516# endif /* PGM_SYNC_DIRTY_BIT */
1517 Log2(("SyncPage: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%VGp%s\n",
1518 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1519 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1520 }
1521 }
1522 return VINF_SUCCESS;
1523 }
1524# ifdef PGM_SYNC_ACCESSED_BIT
1525 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDNAs));
1526#endif
1527 }
1528 else
1529 {
1530 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDOutOfSync));
1531 Log2(("SyncPage: Out-Of-Sync PDE at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1532 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1533 }
1534
1535 /*
1536 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1537 * Yea, I'm lazy.
1538 */
1539 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1540# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1541 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst].u = 0;
1542# else /* PAE */
1543 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst].u = 0;
1544# endif
1545 PGM_INVL_GUEST_TLBS();
1546 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1547
1548#elif PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT
1549
1550# ifdef PGM_SYNC_N_PAGES
1551 /*
1552 * Get the shadow PDE, find the shadow page table in the pool.
1553 */
1554 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1555# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1556 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1557# else /* PAE */
1558 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1559# endif
1560 Assert(PdeDst.n.u1Present);
1561 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1562 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1563
1564# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1565 const unsigned offPTSrc = 0;
1566# else
1567 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1568# endif
1569
1570 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1571 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1572 {
1573 /*
1574 * This code path is currently only taken when the caller is PGMTrap0eHandler
1575 * for non-present pages!
1576 *
1577 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1578 * deal with locality.
1579 */
1580 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1581 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1582 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1583 iPTDst = 0;
1584 else
1585 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1586 for (; iPTDst < iPTDstEnd; iPTDst++)
1587 {
1588 if (!pPTDst->a[iPTDst].n.u1Present)
1589 {
1590 VBOXPTE PteSrc;
1591
1592 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(X86_PT_MASK << X86_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1593
1594 /* Fake the page table entry */
1595 PteSrc.u = GCPtrCurPage;
1596 PteSrc.n.u1Present = 1;
1597 PteSrc.n.u1Dirty = 1;
1598 PteSrc.n.u1Accessed = 1;
1599 PteSrc.n.u1Write = 1;
1600 PteSrc.n.u1User = 1;
1601
1602 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1603
1604 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1605 GCPtrCurPage, PteSrc.n.u1Present,
1606 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1607 PteSrc.n.u1User & PdeSrc.n.u1User,
1608 (uint64_t)PteSrc.u,
1609 (uint64_t)pPTDst->a[iPTDst].u,
1610 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1611 }
1612 }
1613 }
1614 else
1615# endif /* PGM_SYNC_N_PAGES */
1616 {
1617 VBOXPTE PteSrc;
1618 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1619 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(X86_PT_MASK << X86_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1620
1621 /* Fake the page table entry */
1622 PteSrc.u = GCPtrCurPage;
1623 PteSrc.n.u1Present = 1;
1624 PteSrc.n.u1Dirty = 1;
1625 PteSrc.n.u1Accessed = 1;
1626 PteSrc.n.u1Write = 1;
1627 PteSrc.n.u1User = 1;
1628 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1629
1630 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1631 GCPtrPage, PteSrc.n.u1Present,
1632 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1633 PteSrc.n.u1User & PdeSrc.n.u1User,
1634 (uint64_t)PteSrc.u,
1635 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1636 }
1637 return VINF_SUCCESS;
1638
1639#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1640 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
1641 return VERR_INTERNAL_ERROR;
1642#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1643}
1644
1645
1646
1647#if PGM_WITH_PAGING(PGM_GST_TYPE)
1648
1649# ifdef PGM_SYNC_DIRTY_BIT
1650
1651/**
1652 * Investigate page fault and handle write protection page faults caused by
1653 * dirty bit tracking.
1654 *
1655 * @returns VBox status code.
1656 * @param pVM VM handle.
1657 * @param uErr Page fault error code.
1658 * @param pPdeDst Shadow page directory entry.
1659 * @param pPdeSrc Guest page directory entry.
1660 * @param GCPtrPage Guest context page address.
1661 */
1662PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PVBOXPDE pPdeSrc, RTGCUINTPTR GCPtrPage)
1663{
1664 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
1665 LogFlow(("CheckPageFault: GCPtrPage=%VGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
1666
1667 /*
1668 * Real page fault?
1669 */
1670 if ( (uErr & X86_TRAP_PF_RSVD)
1671 || !pPdeSrc->n.u1Present
1672 || ((uErr & X86_TRAP_PF_RW) && !pPdeSrc->n.u1Write)
1673 || ((uErr & X86_TRAP_PF_US) && !pPdeSrc->n.u1User) )
1674 {
1675# ifdef IN_GC
1676 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
1677# endif
1678 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
1679 LogFlow(("CheckPageFault: real page fault at %VGv (1)\n", GCPtrPage));
1680
1681 if (pPdeSrc->n.u1Present)
1682 {
1683 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
1684 * See the 2nd case below as well.
1685 */
1686 if (pPdeSrc->b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE))
1687 {
1688 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
1689 }
1690 else
1691 {
1692 /*
1693 * Map the guest page table.
1694 */
1695 PVBOXPT pPTSrc;
1696 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & X86_PDE_PG_MASK, &pPTSrc);
1697 if (VBOX_SUCCESS(rc))
1698 {
1699 PVBOXPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> PAGE_SHIFT) & PTE_MASK];
1700 const VBOXPTE PteSrc = *pPteSrc;
1701 if (pPteSrc->n.u1Present)
1702 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
1703 }
1704 AssertRC(rc);
1705 }
1706 }
1707 return VINF_EM_RAW_GUEST_TRAP;
1708 }
1709
1710 /*
1711 * First check the easy case where the page directory has been marked read-only to track
1712 * the dirty bit of an emulated BIG page
1713 */
1714 if (pPdeSrc->b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE))
1715 {
1716 /* Mark guest page directory as accessed */
1717 pPdeSrc->b.u1Accessed = 1;
1718
1719 /*
1720 * Only write protection page faults are relevant here.
1721 */
1722 if (uErr & X86_TRAP_PF_RW)
1723 {
1724 /* Mark guest page directory as dirty (BIG page only). */
1725 pPdeSrc->b.u1Dirty = 1;
1726
1727 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
1728 {
1729 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
1730
1731 Assert(pPdeSrc->b.u1Write);
1732
1733 pPdeDst->n.u1Write = 1;
1734 pPdeDst->n.u1Accessed = 1;
1735 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1736 PGM_INVL_BIG_PG(GCPtrPage);
1737 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1738 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
1739 }
1740 }
1741 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1742 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
1743 }
1744 /* else: 4KB page table */
1745
1746 /*
1747 * Map the guest page table.
1748 */
1749 PVBOXPT pPTSrc;
1750 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & X86_PDE_PG_MASK, &pPTSrc);
1751 if (VBOX_SUCCESS(rc))
1752 {
1753 /*
1754 * Real page fault?
1755 */
1756 PVBOXPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> PAGE_SHIFT) & PTE_MASK];
1757 const VBOXPTE PteSrc = *pPteSrc;
1758 if ( !PteSrc.n.u1Present
1759 || ((uErr & X86_TRAP_PF_RW) && !PteSrc.n.u1Write)
1760 || ((uErr & X86_TRAP_PF_US) && !PteSrc.n.u1User)
1761 )
1762 {
1763# ifdef IN_GC
1764 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
1765# endif
1766 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1767 LogFlow(("CheckPageFault: real page fault at %VGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
1768
1769 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
1770 * See the 2nd case above as well.
1771 */
1772 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
1773 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
1774
1775 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1776 return VINF_EM_RAW_GUEST_TRAP;
1777 }
1778 LogFlow(("CheckPageFault: page fault at %VGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
1779
1780 /*
1781 * Set the accessed bits in the page directory and the page table.
1782 */
1783 pPdeSrc->n.u1Accessed = 1;
1784 pPteSrc->n.u1Accessed = 1;
1785
1786 /*
1787 * Only write protection page faults are relevant here.
1788 */
1789 if (uErr & X86_TRAP_PF_RW)
1790 {
1791 /* Write access, so mark guest entry as dirty. */
1792# if defined(IN_GC) && defined(VBOX_WITH_STATISTICS)
1793 if (!pPteSrc->n.u1Dirty)
1794 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtiedPage);
1795 else
1796 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageAlreadyDirty);
1797# endif
1798 pPteSrc->n.u1Dirty = 1;
1799
1800 if (pPdeDst->n.u1Present)
1801 {
1802 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
1803 * Our individual shadow handlers will provide more information and force a fatal exit.
1804 */
1805 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
1806 {
1807 LogRel(("CheckPageFault: write to hypervisor region %VGv\n", GCPtrPage));
1808 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1809 return VINF_SUCCESS;
1810 }
1811
1812 /*
1813 * Map shadow page table.
1814 */
1815 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
1816 if (pShwPage)
1817 {
1818 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1819 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
1820 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
1821 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
1822 {
1823 LogFlow(("DIRTY page trap addr=%VGv\n", GCPtrPage));
1824# ifdef VBOX_STRICT
1825 RTHCPHYS HCPhys;
1826 rc = PGMRamGCPhys2HCPhysWithFlags(&pVM->pgm.s, pPteSrc->u & X86_PTE_PG_MASK, &HCPhys);
1827 if (VBOX_SUCCESS(rc))
1828 AssertMsg(!(HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE)),
1829 ("Unexpected dirty bit tracking on monitored page %VGv (phys %VGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
1830# endif
1831 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
1832
1833 Assert(pPteSrc->n.u1Write);
1834
1835 pPteDst->n.u1Write = 1;
1836 pPteDst->n.u1Dirty = 1;
1837 pPteDst->n.u1Accessed = 1;
1838 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
1839 PGM_INVL_PG(GCPtrPage);
1840
1841 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1842 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
1843 }
1844 }
1845 else
1846 AssertMsgFailed(("pgmPoolGetPageByHCPhys %VGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
1847 }
1848 }
1849/** @todo Optimize accessed bit emulation? */
1850# ifdef VBOX_STRICT
1851 /*
1852 * Sanity check.
1853 */
1854 else if ( !pPteSrc->n.u1Dirty
1855 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
1856 && pPdeDst->n.u1Present)
1857 {
1858 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
1859 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1860 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
1861 if ( pPteDst->n.u1Present
1862 && pPteDst->n.u1Write)
1863 LogFlow(("Writable present page %VGv not marked for dirty bit tracking!!!\n", GCPtrPage));
1864 }
1865# endif /* VBOX_STRICT */
1866 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1867 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
1868 }
1869 AssertRC(rc);
1870 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1871 return rc;
1872}
1873
1874# endif
1875
1876#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
1877
1878
1879/**
1880 * Sync a shadow page table.
1881 *
1882 * The shadow page table is not present. This includes the case where
1883 * there is a conflict with a mapping.
1884 *
1885 * @returns VBox status code.
1886 * @param pVM VM handle.
1887 * @param iPD Page directory index.
1888 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
1889 * Assume this is a temporary mapping.
1890 * @param GCPtrPage GC Pointer of the page that caused the fault
1891 */
1892PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PVBOXPD pPDSrc, RTGCUINTPTR GCPtrPage)
1893{
1894 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
1895 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPtPD[iPDSrc]);
1896 LogFlow(("SyncPT: GCPtrPage=%VGv\n", GCPtrPage));
1897
1898#if PGM_GST_TYPE == PGM_TYPE_32BIT
1899
1900# if PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
1901# error "Invalid shadow mode for 32-bit guest mode!"
1902# endif
1903
1904 /*
1905 * Validate input a little bit.
1906 */
1907 Assert(iPDSrc == (GCPtrPage >> GST_PD_SHIFT));
1908# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1909 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
1910# else
1911 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
1912# endif
1913 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1914 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
1915 SHWPDE PdeDst = *pPdeDst;
1916
1917 /*
1918 * Check for conflicts.
1919 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
1920 * HC: Simply resolve the conflict.
1921 */
1922 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1923 {
1924 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1925# ifndef IN_RING3
1926 Log(("SyncPT: Conflict at %VGv\n", GCPtrPage));
1927 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
1928 return VERR_ADDRESS_CONFLICT;
1929# else
1930 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
1931 Assert(pMapping);
1932 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPDSrc);
1933 if (VBOX_FAILURE(rc))
1934 {
1935 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
1936 return rc;
1937 }
1938 PdeDst = *pPdeDst;
1939# endif
1940 }
1941 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
1942
1943 /*
1944 * Sync page directory entry.
1945 */
1946 int rc = VINF_SUCCESS;
1947 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1948 if (PdeSrc.n.u1Present)
1949 {
1950 /*
1951 * Allocate & map the page table.
1952 */
1953 PSHWPT pPTDst;
1954 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1955 PPGMPOOLPAGE pShwPage;
1956 RTGCPHYS GCPhys;
1957 if (fPageTable)
1958 {
1959 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1960# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1961 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1962# endif
1963 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
1964 }
1965 else
1966 {
1967 GCPhys = PdeSrc.u & GST_PDE4M_PG_MASK;
1968# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1969 GCPhys |= GCPtrPage & BIT(X86_PAGE_2M_SHIFT);
1970# endif
1971 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
1972 }
1973 if (rc == VINF_SUCCESS)
1974 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1975 else if (rc == VINF_PGM_CACHED_PAGE)
1976 {
1977 /*
1978 * The PT was cached, just hook it up.
1979 */
1980 if (fPageTable)
1981 PdeDst.u = pShwPage->Core.Key
1982 | (PdeSrc.u & ~(X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
1983 else
1984 {
1985 PdeDst.u = pShwPage->Core.Key
1986 | (PdeSrc.u & ~(X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
1987# ifdef PGM_SYNC_DIRTY_BIT /* (see explanation and assumtions further down.) */
1988 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1989 {
1990 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
1991 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1992 PdeDst.b.u1Write = 0;
1993 }
1994# endif
1995 }
1996 *pPdeDst = PdeDst;
1997 return VINF_SUCCESS;
1998 }
1999 else if (rc == VERR_PGM_POOL_FLUSHED)
2000 return VINF_PGM_SYNC_CR3;
2001 else
2002 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
2003 PdeDst.u &= X86_PDE_AVL_MASK;
2004 PdeDst.u |= pShwPage->Core.Key;
2005
2006# ifdef PGM_SYNC_DIRTY_BIT
2007 /*
2008 * Page directory has been accessed (this is a fault situation, remember).
2009 */
2010 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2011# endif
2012 if (fPageTable)
2013 {
2014 /*
2015 * Page table - 4KB.
2016 *
2017 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2018 */
2019 Log2(("SyncPT: 4K %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2020 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2021 PGSTPT pPTSrc;
2022 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2023 if (VBOX_SUCCESS(rc))
2024 {
2025 /*
2026 * Start by syncing the page directory entry so CSAM's TLB trick works.
2027 */
2028 PdeDst.u = (PdeDst.u & (X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK))
2029 | (PdeSrc.u & ~(X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2030 *pPdeDst = PdeDst;
2031
2032 /*
2033 * Directory/page user or supervisor privilege: (same goes for read/write)
2034 *
2035 * Directory Page Combined
2036 * U/S U/S U/S
2037 * 0 0 0
2038 * 0 1 0
2039 * 1 0 0
2040 * 1 1 1
2041 *
2042 * Simple AND operation. Table listed for completeness.
2043 *
2044 */
2045 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4k));
2046# ifdef PGM_SYNC_N_PAGES
2047 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2048 unsigned iPTDst = iPTBase;
2049 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
2050 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2051 iPTDst = 0;
2052 else
2053 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2054# else /* !PGM_SYNC_N_PAGES */
2055 unsigned iPTDst = 0;
2056 const unsigned iPTDstEnd = ELEMENTS(pPTDst->a);
2057# endif /* !PGM_SYNC_N_PAGES */
2058# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2059 const unsigned offPTSrc = 0;
2060# else
2061 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2062# endif
2063 for (; iPTDst < iPTDstEnd; iPTDst++)
2064 {
2065 const unsigned iPTSrc = iPTDst + offPTSrc;
2066 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2067
2068 if (PteSrc.n.u1Present) /* we've already cleared it above */
2069 {
2070#ifndef IN_RING0
2071 /*
2072 * Assuming kernel code will be marked as supervisor - and not as user level
2073 * and executed using a conforming code selector - And marked as readonly.
2074 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2075 */
2076 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2077 || !CSAMDoesPageNeedScanning(pVM, (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2078 || PGMRamTestFlags(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK,
2079 MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE)
2080 )
2081#endif
2082 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2083 Log2(("SyncPT: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%VGp\n",
2084 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2085 PteSrc.n.u1Present,
2086 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2087 PteSrc.n.u1User & PdeSrc.n.u1User,
2088 (uint64_t)PteSrc.u,
2089 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2090 (PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)));
2091 }
2092 } /* for PTEs */
2093 }
2094 }
2095 else
2096 {
2097 /*
2098 * Big page - 2/4MB.
2099 *
2100 * We'll walk the ram range list in parallel and optimize lookups.
2101 * We will only sync on shadow page table at a time.
2102 */
2103 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4M));
2104
2105 /**
2106 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2107 */
2108
2109 /*
2110 * Start by syncing the page directory entry.
2111 */
2112 PdeDst.u = (PdeDst.u & (X86_PDE_PAE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2113 | (PdeSrc.u & ~(X86_PDE_PAE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2114
2115# ifdef PGM_SYNC_DIRTY_BIT
2116 /*
2117 * If the page is not flagged as dirty and is writable, then make it read-only
2118 * at PD level, so we can set the dirty bit when the page is modified.
2119 *
2120 * ASSUMES that page access handlers are implemented on page table entry level.
2121 * Thus we will first catch the dirty access and set PDE.D and restart. If
2122 * there is an access handler, we'll trap again and let it work on the problem.
2123 */
2124 /** @todo move the above stuff to a section in the PGM documentation. */
2125 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2126 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2127 {
2128 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
2129 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2130 PdeDst.b.u1Write = 0;
2131 }
2132# endif /* PGM_SYNC_DIRTY_BIT */
2133 *pPdeDst = PdeDst;
2134
2135 /*
2136 * Fill the shadow page table.
2137 */
2138 /* Get address and flags from the source PDE. */
2139 SHWPTE PteDstBase;
2140 PteDstBase.u = PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2141
2142 /* Loop thru the entries in the shadow PT. */
2143 const RTGCUINTPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2144 Log2(("SyncPT: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%VGv GCPhys=%VGp %s\n",
2145 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2146 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2147 PPGMRAMRANGE pRam = CTXSUFF(pVM->pgm.s.pRamRanges);
2148 unsigned iPTDst = 0;
2149 while (iPTDst < ELEMENTS(pPTDst->a))
2150 {
2151 /* Advance ram range list. */
2152 while (pRam && GCPhys > pRam->GCPhysLast)
2153 pRam = CTXSUFF(pRam->pNext);
2154 if (pRam && GCPhys >= pRam->GCPhys)
2155 {
2156 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2157 do
2158 {
2159 /* Make shadow PTE. */
2160 RTHCPHYS HCPhys = pRam->aHCPhys[iHCPage];
2161 SHWPTE PteDst;
2162
2163 /* Make sure the RAM has already been allocated. */
2164 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC)
2165 {
2166 if (RT_UNLIKELY(!(pRam->aHCPhys[iHCPage] & X86_PTE_PAE_PG_MASK)))
2167 {
2168# ifdef IN_RING3
2169 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2170# else
2171 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2172# endif
2173 if (rc != VINF_SUCCESS)
2174 return rc;
2175
2176 HCPhys = pRam->aHCPhys[iHCPage];
2177 }
2178 }
2179
2180 if (HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE))
2181 {
2182 if (!(HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL)))
2183 {
2184 PteDst.u = (HCPhys & X86_PTE_PAE_PG_MASK) | PteDstBase.u;
2185 PteDst.n.u1Write = 0;
2186 }
2187 else
2188 PteDst.u = 0;
2189 }
2190# ifndef IN_RING0
2191 /*
2192 * Assuming kernel code will be marked as supervisor and not as user level and executed
2193 * using a conforming code selector. Don't check for readonly, as that implies the whole
2194 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2195 */
2196 else if ( !PdeSrc.n.u1User
2197 && CSAMDoesPageNeedScanning(pVM, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2198 PteDst.u = 0;
2199# endif
2200 else
2201 PteDst.u = (HCPhys & X86_PTE_PAE_PG_MASK) | PteDstBase.u;
2202# ifdef PGMPOOL_WITH_USER_TRACKING
2203 if (PteDst.n.u1Present)
2204 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, &pRam->aHCPhys[iHCPage], iPTDst);
2205# endif
2206 /* commit it */
2207 pPTDst->a[iPTDst] = PteDst;
2208 Log4(("SyncPT: BIG %VGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2209 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2210 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2211
2212 /* advance */
2213 GCPhys += PAGE_SIZE;
2214 iHCPage++;
2215 iPTDst++;
2216 } while ( iPTDst < ELEMENTS(pPTDst->a)
2217 && GCPhys <= pRam->GCPhysLast);
2218 }
2219 else if (pRam)
2220 {
2221 Log(("Invalid pages at %VGp\n", GCPhys));
2222 do
2223 {
2224 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2225 GCPhys += PAGE_SIZE;
2226 iPTDst++;
2227 } while ( iPTDst < ELEMENTS(pPTDst->a)
2228 && GCPhys < pRam->GCPhys);
2229 }
2230 else
2231 {
2232 Log(("Invalid pages at %VGp (2)\n", GCPhys));
2233 for ( ; iPTDst < ELEMENTS(pPTDst->a); iPTDst++)
2234 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2235 }
2236 } /* while more PTEs */
2237 } /* 4KB / 4MB */
2238 }
2239 else
2240 AssertRelease(!PdeDst.n.u1Present);
2241
2242 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2243# ifdef IN_GC
2244 if (VBOX_FAILURE(rc))
2245 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPTFailed));
2246# endif
2247 return rc;
2248
2249#elif PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT
2250
2251 int rc = VINF_SUCCESS;
2252
2253 /*
2254 * Validate input a little bit.
2255 */
2256# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2257 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
2258# else
2259 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2260# endif
2261 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2262 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2263 SHWPDE PdeDst = *pPdeDst;
2264
2265 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2266 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2267
2268 VBOXPDE PdeSrc;
2269 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2270 PdeSrc.n.u1Present = 1;
2271 PdeSrc.n.u1Write = 1;
2272 PdeSrc.n.u1Accessed = 1;
2273 PdeSrc.n.u1User = 1;
2274
2275 /*
2276 * Allocate & map the page table.
2277 */
2278 PSHWPT pPTDst;
2279 PPGMPOOLPAGE pShwPage;
2280 RTGCPHYS GCPhys;
2281
2282 /* Virtual address = physical address */
2283 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK_32;
2284 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2285
2286 if ( rc == VINF_SUCCESS
2287 || rc == VINF_PGM_CACHED_PAGE)
2288 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2289 else
2290 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
2291
2292 PdeDst.u &= X86_PDE_AVL_MASK;
2293 PdeDst.u |= pShwPage->Core.Key;
2294 PdeDst.n.u1Present = 1;
2295 *pPdeDst = PdeDst;
2296
2297 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2298 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2299 return rc;
2300
2301#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2302
2303 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2304 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2305 return VERR_INTERNAL_ERROR;
2306#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2307}
2308
2309
2310
2311/**
2312 * Prefetch a page/set of pages.
2313 *
2314 * Typically used to sync commonly used pages before entering raw mode
2315 * after a CR3 reload.
2316 *
2317 * @returns VBox status code.
2318 * @param pVM VM handle.
2319 * @param GCPtrPage Page to invalidate.
2320 */
2321PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage)
2322{
2323#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) && PGM_SHW_TYPE != PGM_TYPE_AMD64
2324
2325# if PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
2326# error "Invalid shadow mode for 32-bit guest mode!"
2327# endif
2328
2329 /*
2330 * Check that all Guest levels thru the PDE are present, getting the
2331 * PD and PDE in the processes.
2332 */
2333 int rc = VINF_SUCCESS;
2334# if PGM_WITH_PAGING(PGM_GST_TYPE)
2335 PVBOXPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2336 const unsigned iPDSrc = (RTGCUINTPTR)GCPtrPage >> GST_PD_SHIFT;
2337# else
2338 PVBOXPD pPDSrc = NULL;
2339 const unsigned iPDSrc = 0;
2340# endif
2341
2342# if PGM_WITH_PAGING(PGM_GST_TYPE)
2343 const VBOXPDE PdeSrc = pPDSrc->a[iPDSrc];
2344# else
2345 VBOXPDE PdeSrc;
2346 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2347 PdeSrc.n.u1Present = 1;
2348 PdeSrc.n.u1Write = 1;
2349 PdeSrc.n.u1Accessed = 1;
2350 PdeSrc.n.u1User = 1;
2351# endif
2352
2353# ifdef PGM_SYNC_ACCESSED_BIT
2354 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
2355# else
2356 if (PdeSrc.n.u1Present)
2357# endif
2358 {
2359# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2360 const X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> X86_PD_SHIFT];
2361# else
2362 const X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> X86_PD_PAE_SHIFT];
2363# endif
2364 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
2365 {
2366 if (!PdeDst.n.u1Present)
2367 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
2368 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2369 else
2370 {
2371 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
2372 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
2373 * makes no sense to prefetch more than one page.
2374 */
2375 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2376 if (VBOX_SUCCESS(rc))
2377 rc = VINF_SUCCESS;
2378 }
2379 }
2380 }
2381 return rc;
2382
2383#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2384
2385 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
2386 return VERR_INTERNAL_ERROR;
2387#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2388}
2389
2390
2391
2392
2393/**
2394 * Syncs a page during a PGMVerifyAccess() call.
2395 *
2396 * @returns VBox status code (informational included).
2397 * @param GCPtrPage The address of the page to sync.
2398 * @param fPage The effective guest page flags.
2399 * @param uErr The trap error code.
2400 */
2401PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR GCPtrPage, unsigned fPage, unsigned uErr)
2402{
2403 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%VGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
2404
2405#if PGM_GST_TYPE == PGM_TYPE_32BIT
2406
2407# if PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
2408# error "Invalid shadow mode for 32-bit guest mode!"
2409# endif
2410
2411#ifndef IN_RING0
2412 if (!(fPage & X86_PTE_US))
2413 {
2414 /*
2415 * Mark this page as safe.
2416 */
2417 /** @todo not correct for pages that contain both code and data!! */
2418 Log(("CSAMMarkPage %VGv; scanned=%d\n", GCPtrPage, true));
2419 CSAMMarkPage(pVM, (RTGCPTR)GCPtrPage, true);
2420 }
2421#endif
2422 /*
2423 * Get guest PD and index.
2424 */
2425 unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
2426 PVBOXPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2427 int rc = VINF_SUCCESS;
2428
2429# ifdef PGM_SYNC_DIRTY_BIT
2430 /*
2431 * First check if the page fault was caused by dirty bit tracking
2432 */
2433# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2434 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> X86_PD_SHIFT];
2435# else
2436 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> X86_PD_PAE_SHIFT];
2437# endif
2438 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
2439 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
2440 Log(("PGMVerifyAccess: success (dirty)\n"));
2441 else
2442# endif /* PGM_SYNC_DIRTY_BIT */
2443 {
2444 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
2445 if (uErr & X86_TRAP_PF_US)
2446 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
2447 else /* supervisor */
2448 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
2449
2450 rc = PGM_BTH_NAME(SyncPage)(pVM, pPDSrc->a[iPDSrc], GCPtrPage, 1, 0);
2451 if (VBOX_SUCCESS(rc))
2452 {
2453 /* Page was successfully synced */
2454 Log(("PGMVerifyAccess: success (sync)\n"));
2455 rc = VINF_SUCCESS;
2456 }
2457 else
2458 {
2459 Log(("PGMVerifyAccess: access violation for %VGv rc=%d\n", GCPtrPage, rc));
2460 return VINF_EM_RAW_GUEST_TRAP;
2461 }
2462 }
2463 return rc;
2464
2465#elif PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT
2466 /* Everything is allowed */
2467 return VINF_SUCCESS;
2468
2469#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2470
2471 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2472 return VERR_INTERNAL_ERROR;
2473#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2474}
2475
2476
2477#if PGM_GST_TYPE == PGM_TYPE_32BIT
2478# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE
2479/**
2480 * Figures out which kind of shadow page this guest PDE warrants.
2481 *
2482 * @returns Shadow page kind.
2483 * @param pPdeSrc The guest PDE in question.
2484 * @param cr4 The current guest cr4 value.
2485 */
2486DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const VBOXPDE *pPdeSrc, uint32_t cr4)
2487{
2488 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
2489 return BTH_PGMPOOLKIND_PT_FOR_PT;
2490 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
2491 //{
2492 // case 0:
2493 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
2494 // case X86_PDE4M_RW:
2495 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
2496 // case X86_PDE4M_US:
2497 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
2498 // case X86_PDE4M_RW | X86_PDE4M_US:
2499 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
2500# if 0
2501 // case X86_PDE4M_PAE_NX:
2502 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
2503 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
2504 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
2505 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
2506 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
2507 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
2508 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
2509# endif
2510 return BTH_PGMPOOLKIND_PT_FOR_BIG;
2511 //}
2512}
2513# endif
2514#endif
2515
2516#undef MY_STAM_COUNTER_INC
2517#define MY_STAM_COUNTER_INC(a) do { } while (0)
2518
2519
2520/**
2521 * Syncs the paging hierarchy starting at CR3.
2522 *
2523 * @returns VBox status code, no specials.
2524 * @param pVM The virtual machine.
2525 * @param cr0 Guest context CR0 register
2526 * @param cr3 Guest context CR3 register
2527 * @param cr4 Guest context CR4 register
2528 * @param fGlobal Including global page directories or not
2529 */
2530PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint32_t cr0, uint32_t cr3, uint32_t cr4, bool fGlobal)
2531{
2532#if PGM_GST_TYPE == PGM_TYPE_32BIT
2533# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE
2534 /*
2535 * Inform the PGM PD Cache Manager about the pending sync.
2536 */
2537 if (fGlobal || VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
2538 {
2539# if 0 /** @todo what the heck is this about? */
2540 /* Don't cause an additional global CR3 reload the next time (the flag is cleared in PGMSyncCR3). */
2541 VM_FF_CLEAR(pVM, VM_FF_PGM_SYNC_CR3);
2542# endif
2543
2544 /* Change this CR3 reload to be a global one. */
2545 fGlobal = true;
2546 }
2547# endif
2548#endif
2549
2550 /*
2551 * Update page access handlers.
2552 * The virtual are always flushed, while the physical are only on demand.
2553 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
2554 * have to look into that later because it will have a bad influence on the performance.
2555 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
2556 * bird: Yes, but that won't work for aliases.
2557 */
2558 /** @todo this MUST go away. See #1557. */
2559 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
2560 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
2561 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
2562
2563#ifdef PGMPOOL_WITH_MONITORING
2564 /*
2565 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2566 * Occationally we will have to clear all the shadow page tables because we wanted
2567 * to monitor a page which was mapped by too many shadowed page tables. This operation
2568 * sometimes refered to as a 'lightweight flush'.
2569 */
2570 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2571 pgmPoolMonitorModifiedClearAll(pVM);
2572 else
2573 {
2574# ifdef IN_RING3
2575 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2576 pgmPoolClearAll(pVM);
2577# else
2578 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2579 return VINF_PGM_SYNC_CR3;
2580# endif
2581 }
2582#endif
2583
2584 Assert(fGlobal || (cr4 & X86_CR4_PGE));
2585 MY_STAM_COUNTER_INC(fGlobal ? &pVM->pgm.s.CTXMID(Stat,SyncCR3Global) : &pVM->pgm.s.CTXMID(Stat,SyncCR3NotGlobal));
2586
2587#if PGM_GST_TYPE == PGM_TYPE_32BIT
2588# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE
2589 /*
2590 * Get page directory addresses.
2591 */
2592# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2593 PX86PDE pPDEDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[0];
2594# else
2595 PX86PDEPAE pPDEDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[0];
2596# endif
2597 PVBOXPD pPDSrc = pVM->pgm.s.CTXSUFF(pGuestPD);
2598
2599 Assert(pPDSrc);
2600#ifndef IN_GC
2601 Assert(MMPhysGCPhys2HCVirt(pVM, (RTGCPHYS)(cr3 & X86_CR3_PAGE_MASK), sizeof(*pPDSrc)) == pPDSrc);
2602#endif
2603
2604 /*
2605 * Iterate the page directory.
2606 */
2607 PPGMMAPPING pMapping;
2608 unsigned iPdNoMapping;
2609 const bool fRawR0Enabled = EMIsRawRing0Enabled(pVM);
2610 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2611
2612 /* Only check mappings if they are supposed to be put into the shadow page table. */
2613 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
2614 {
2615 pMapping = pVM->pgm.s.CTXSUFF(pMappings);
2616 iPdNoMapping = (pMapping) ? pMapping->GCPtr >> PGDIR_SHIFT : ~0U;
2617 }
2618 else
2619 {
2620 pMapping = 0;
2621 iPdNoMapping = ~0U;
2622 }
2623
2624 for (unsigned iPD = 0; iPD < ELEMENTS(pPDSrc->a); iPD++)
2625 {
2626# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2627 Assert(&pVM->pgm.s.CTXMID(p,32BitPD)->a[iPD] == pPDEDst);
2628# else
2629 Assert(&pVM->pgm.s.CTXMID(ap,PaePDs)[iPD * 2 / 512]->a[iPD * 2 % 512] == pPDEDst);
2630# endif
2631 register VBOXPDE PdeSrc = pPDSrc->a[iPD];
2632 if ( PdeSrc.n.u1Present
2633 && (PdeSrc.n.u1User || fRawR0Enabled))
2634 {
2635 /*
2636 * Check for conflicts with GC mappings.
2637 */
2638 if (iPD == iPdNoMapping)
2639 {
2640 if (pVM->pgm.s.fMappingsFixed)
2641 {
2642 /* It's fixed, just skip the mapping. */
2643 const unsigned cPTs = pMapping->cPTs;
2644 iPD += cPTs - 1;
2645 pPDEDst += cPTs + (PGM_SHW_TYPE != PGM_TYPE_32BIT) * cPTs;
2646 pMapping = pMapping->CTXSUFF(pNext);
2647 iPdNoMapping = pMapping ? pMapping->GCPtr >> PGDIR_SHIFT : ~0U;
2648 continue;
2649 }
2650
2651#ifdef IN_RING3
2652 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD);
2653 if (VBOX_FAILURE(rc))
2654 return rc;
2655
2656 /*
2657 * Update iPdNoMapping and pMapping.
2658 */
2659 pMapping = pVM->pgm.s.pMappingsHC;
2660 while (pMapping && pMapping->GCPtr < (iPD << PGDIR_SHIFT))
2661 pMapping = pMapping->pNextHC;
2662 iPdNoMapping = pMapping ? pMapping->GCPtr >> PGDIR_SHIFT : ~0U;
2663#else
2664 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
2665 return VINF_PGM_SYNC_CR3;
2666#endif
2667 }
2668
2669 /*
2670 * Sync page directory entry.
2671 *
2672 * The current approach is to allocated the page table but to set
2673 * the entry to not-present and postpone the page table synching till
2674 * it's actually used.
2675 */
2676# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2677 const unsigned iPdShw = iPD; NOREF(iPdShw);
2678# else
2679 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
2680# endif
2681 {
2682 SHWPDE PdeDst = *pPDEDst;
2683 if (PdeDst.n.u1Present)
2684 {
2685 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2686 RTGCPHYS GCPhys;
2687 if ( !PdeSrc.b.u1Size
2688 || !(cr4 & X86_CR4_PSE))
2689 {
2690 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2691# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2692 GCPhys |= i * (PAGE_SIZE / 2);
2693# endif
2694 }
2695 else
2696 {
2697 GCPhys = PdeSrc.u & GST_PDE4M_PG_MASK;
2698# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2699 GCPhys |= i * X86_PAGE_2M_SIZE;
2700# endif
2701 }
2702
2703 if ( pShwPage->GCPhys == GCPhys
2704 && pShwPage->enmKind == PGM_BTH_NAME(CalcPageKind)(&PdeSrc, cr4)
2705 && ( pShwPage->fCached
2706 || ( !fGlobal
2707 && ( false
2708# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
2709 || ( (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
2710 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE)) /* global 2/4MB page. */
2711 || ( !pShwPage->fSeenNonGlobal
2712 && (cr4 & X86_CR4_PGE))
2713# endif
2714 )
2715 )
2716 )
2717 && ( (PdeSrc.u & (X86_PDE_US | X86_PDE_RW)) == (PdeDst.u & (X86_PDE_US | X86_PDE_RW))
2718 || ( (cr4 & X86_CR4_PSE)
2719 && ((PdeSrc.u & (X86_PDE_US | X86_PDE4M_PS | X86_PDE4M_D)) | PGM_PDFLAGS_TRACK_DIRTY)
2720 == ((PdeDst.u & (X86_PDE_US | X86_PDE_RW | PGM_PDFLAGS_TRACK_DIRTY)) | X86_PDE4M_PS))
2721 )
2722 )
2723 {
2724# ifdef VBOX_WITH_STATISTICS
2725 if ( !fGlobal
2726 && (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
2727 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE))
2728 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPD));
2729 else if (!fGlobal && !pShwPage->fSeenNonGlobal && (cr4 & X86_CR4_PGE))
2730 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPT));
2731 else
2732 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstCacheHit));
2733# endif /* VBOX_WITH_STATISTICS */
2734/** @todo a replacement strategy isn't really needed unless we're using a very small pool < 512 pages.
2735 * The whole ageing stuff should be put in yet another set of #ifdefs. For now, let's just skip it. */
2736//# ifdef PGMPOOL_WITH_CACHE
2737// pgmPoolCacheUsed(pPool, pShwPage);
2738//# endif
2739 }
2740 else
2741 {
2742 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPdShw);
2743 pPDEDst->u = 0;
2744 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreed));
2745 }
2746 }
2747 else
2748 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstNotPresent));
2749 pPDEDst++;
2750 }
2751 }
2752 else if (iPD != iPdNoMapping)
2753 {
2754 /*
2755 * Check if there is any page directory to mark not present here.
2756 */
2757# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2758 const unsigned iPdShw = iPD; NOREF(iPdShw);
2759# else
2760 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
2761# endif
2762 {
2763 if (pPDEDst->n.u1Present)
2764 {
2765 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst->u & SHW_PDE_PG_MASK), SHW_POOL_ROOT_IDX, iPdShw);
2766 pPDEDst->u = 0;
2767 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreedSrcNP));
2768 }
2769 pPDEDst++;
2770 }
2771 }
2772 else
2773 {
2774 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2775 const unsigned cPTs = pMapping->cPTs;
2776 if (pVM->pgm.s.fMappingsFixed)
2777 {
2778 /* It's fixed, just skip the mapping. */
2779 pMapping = pMapping->CTXSUFF(pNext);
2780 iPdNoMapping = pMapping ? pMapping->GCPtr >> PGDIR_SHIFT : ~0U;
2781 }
2782 else
2783 {
2784 /*
2785 * Check for conflicts for subsequent pagetables
2786 * and advance to the next mapping.
2787 */
2788 iPdNoMapping = ~0U;
2789 unsigned iPT = cPTs;
2790 while (iPT-- > 1)
2791 {
2792 if ( pPDSrc->a[iPD + iPT].n.u1Present
2793 && (pPDSrc->a[iPD + iPT].n.u1User || fRawR0Enabled))
2794 {
2795# ifdef IN_RING3
2796 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD);
2797 if (VBOX_FAILURE(rc))
2798 return rc;
2799
2800 /*
2801 * Update iPdNoMapping and pMapping.
2802 */
2803 pMapping = pVM->pgm.s.CTXSUFF(pMappings);
2804 while (pMapping && pMapping->GCPtr < (iPD << PGDIR_SHIFT))
2805 pMapping = pMapping->CTXSUFF(pNext);
2806 iPdNoMapping = pMapping ? pMapping->GCPtr >> PGDIR_SHIFT : ~0U;
2807 break;
2808# else
2809 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
2810 return VINF_PGM_SYNC_CR3;
2811# endif
2812 }
2813 }
2814 if (iPdNoMapping == ~0U && pMapping)
2815 {
2816 pMapping = pMapping->CTXSUFF(pNext);
2817 if (pMapping)
2818 iPdNoMapping = pMapping->GCPtr >> PGDIR_SHIFT;
2819 }
2820 }
2821 /* advance. */
2822 iPD += cPTs - 1;
2823 pPDEDst += cPTs + (PGM_SHW_TYPE != PGM_TYPE_32BIT) * cPTs;
2824 }
2825
2826 } /* for iPD */
2827# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2828# error "Guest 32-bit mode and shadow AMD64 mode doesn't add up!"
2829# endif
2830
2831 return VINF_SUCCESS;
2832
2833#elif PGM_GST_TYPE == PGM_TYPE_PAE
2834# if PGM_SHW_TYPE == PGM_TYPE_PAE
2835//# error not implemented
2836 return VERR_INTERNAL_ERROR;
2837
2838# else /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
2839# error "Guest PAE mode, but not the shadow mode ; 32bit - maybe, but amd64 no."
2840# endif /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
2841
2842#elif PGM_GST_TYPE == PGM_TYPE_AMD64
2843# if PGM_SHW_TYPE == PGM_TYPE_AMD64
2844//# error not implemented
2845 return VERR_INTERNAL_ERROR;
2846
2847# else /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
2848# error "Guest AMD64 mode, but not the shadow mode - that can't be right!"
2849# endif /* PGM_SHW_TYPE != PGM_TYPE_AMD64 */
2850
2851#else /* guest real and protected mode */
2852
2853 return VINF_SUCCESS;
2854#endif
2855}
2856
2857
2858
2859
2860#ifdef VBOX_STRICT
2861#ifdef IN_GC
2862# undef AssertMsgFailed
2863# define AssertMsgFailed Log
2864#endif
2865#ifdef IN_RING3
2866# include <VBox/dbgf.h>
2867
2868/**
2869 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
2870 *
2871 * @returns VBox status code (VINF_SUCCESS).
2872 * @param pVM The VM handle.
2873 * @param cr3 The root of the hierarchy.
2874 * @param crr The cr4, only PAE and PSE is currently used.
2875 * @param fLongMode Set if long mode, false if not long mode.
2876 * @param cMaxDepth Number of levels to dump.
2877 * @param pHlp Pointer to the output functions.
2878 */
2879__BEGIN_DECLS
2880PGMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
2881__END_DECLS
2882
2883#endif
2884
2885/**
2886 * Checks that the shadow page table is in sync with the guest one.
2887 *
2888 * @returns The number of errors.
2889 * @param pVM The virtual machine.
2890 * @param cr3 Guest context CR3 register
2891 * @param cr4 Guest context CR4 register
2892 * @param GCPtr Where to start. Defaults to 0.
2893 * @param cb How much to check. Defaults to everything.
2894 */
2895PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint32_t cr3, uint32_t cr4, RTGCUINTPTR GCPtr, RTGCUINTPTR cb)
2896{
2897 unsigned cErrors = 0;
2898
2899#if PGM_GST_TYPE == PGM_TYPE_32BIT
2900
2901# if PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
2902# error "Invalid shadow mode for 32-bit guest paging."
2903# endif
2904
2905 PPGM pPGM = &pVM->pgm.s;
2906 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
2907 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
2908 RTHCPHYS HCPhys; /* general usage. */
2909 int rc;
2910
2911 /*
2912 * Check that the Guest CR3 and all it's mappings are correct.
2913 */
2914 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & X86_CR3_PAGE_MASK),
2915 ("Invalid GCPhysCR3=%VGp cr3=%VGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
2916 false);
2917 rc = PGMShwGetPage(pVM, pPGM->pGuestPDGC, NULL, &HCPhysShw);
2918 AssertRCReturn(rc, 1);
2919 rc = PGMRamGCPhys2HCPhys(pPGM, cr3 & X86_CR3_PAGE_MASK, &HCPhys);
2920 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%VHp HCPhyswShw=%VHp (cr3)\n", HCPhys, HCPhysShw), false);
2921# ifndef IN_GC
2922 RTGCPHYS GCPhys;
2923 rc = PGMPhysHCPtr2GCPhys(pVM, pPGM->pGuestPDHC, &GCPhys);
2924 AssertRCReturn(rc, 1);
2925 AssertMsgReturn((cr3 & X86_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%VGp cr3=%VGp\n", GCPhys, (RTGCPHYS)cr3), false);
2926# endif
2927 const X86PD *pPDSrc = CTXSUFF(pPGM->pGuestPD);
2928
2929 /*
2930 * Get and check the Shadow CR3.
2931 */
2932# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2933 const X86PD *pPDDst = pPGM->CTXMID(p,32BitPD);
2934 unsigned cPDEs = ELEMENTS(pPDDst->a);
2935# else
2936 const X86PDPAE *pPDDst = pPGM->CTXMID(ap,PaePDs[0]); /* use it as a 2048 entry PD */
2937 unsigned cPDEs = ELEMENTS(pPDDst->a) * ELEMENTS(pPGM->apHCPaePDs);
2938# endif
2939 if (cb != ~(RTGCUINTPTR)0)
2940 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
2941
2942/** @todo call the other two PGMAssert*() functions. */
2943
2944 /*
2945 * Iterate the shadow page directory.
2946 */
2947 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
2948 unsigned iPDDst = GCPtr >> SHW_PD_SHIFT;
2949 cPDEs += iPDDst;
2950 for (;
2951 iPDDst < cPDEs;
2952 iPDDst++, GCPtr += _4G / cPDEs)
2953 {
2954 const SHWPDE PdeDst = pPDDst->a[iPDDst];
2955 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2956 {
2957 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2958 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
2959 {
2960 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
2961 cErrors++;
2962 continue;
2963 }
2964 }
2965 else if ( (PdeDst.u & X86_PDE_P)
2966 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
2967 )
2968 {
2969 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
2970 PPGMPOOLPAGE pPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
2971 if (!pPage)
2972 {
2973 AssertMsgFailed(("Invalid page table address %VGp at %VGv! PdeDst=%#RX64\n",
2974 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
2975 cErrors++;
2976 continue;
2977 }
2978 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2979
2980 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
2981 {
2982 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %VGv! These flags are not virtualized! PdeDst=%#RX64\n",
2983 GCPtr, (uint64_t)PdeDst.u));
2984 cErrors++;
2985 }
2986
2987 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
2988 {
2989 AssertMsgFailed(("4K PDE reserved flags at %VGv! PdeDst=%#RX64\n",
2990 GCPtr, (uint64_t)PdeDst.u));
2991 cErrors++;
2992 }
2993
2994 const X86PDE PdeSrc = pPDSrc->a[iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)];
2995 if (!PdeSrc.n.u1Present)
2996 {
2997 AssertMsgFailed(("Guest PDE at %VGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
2998 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
2999 cErrors++;
3000 continue;
3001 }
3002
3003 if ( !PdeSrc.b.u1Size
3004 || !(cr4 & X86_CR4_PSE))
3005 {
3006 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3007# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3008 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3009# endif
3010 }
3011 else
3012 {
3013 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3014 {
3015 AssertMsgFailed(("Guest PDE at %VGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3016 GCPtr, (uint64_t)PdeSrc.u));
3017 cErrors++;
3018 continue;
3019 }
3020 GCPhysGst = PdeSrc.u & GST_PDE4M_PG_MASK;
3021# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3022 GCPhysGst |= GCPtr & BIT(X86_PAGE_2M_SHIFT);
3023# endif
3024 }
3025
3026 if ( pPage->enmKind
3027 != (!PdeSrc.b.u1Size || !(cr4 & X86_CR4_PSE) ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3028 {
3029 AssertMsgFailed(("Invalid shadow page table kind %d at %VGv! PdeSrc=%#RX64\n",
3030 pPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3031 cErrors++;
3032 }
3033
3034 rc = PGMRamGCPhys2HCPhysWithFlags(pPGM, GCPhysGst, &HCPhys);
3035 if (VBOX_FAILURE(rc))
3036 {
3037 AssertMsgFailed(("Cannot find guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3038 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3039 cErrors++;
3040 continue;
3041 }
3042
3043 if (GCPhysGst != pPage->GCPhys)
3044 {
3045 AssertMsgFailed(("GCPhysGst=%VGp != pPage->GCPhys=%VGp at %VGv\n",
3046 GCPhysGst, pPage->GCPhys, GCPtr));
3047 cErrors++;
3048 continue;
3049 }
3050
3051 if ( !PdeSrc.b.u1Size
3052 || !(cr4 & X86_CR4_PSE))
3053 {
3054 /*
3055 * Page Table.
3056 */
3057 const GSTPT *pPTSrc;
3058 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3059 if (VBOX_FAILURE(rc))
3060 {
3061 AssertMsgFailed(("Cannot map/convert guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3062 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3063 cErrors++;
3064 continue;
3065 }
3066 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3067 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3068 {
3069 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3070 // (This problem will go away when/if we shadow multiple CR3s.)
3071 AssertMsgFailed(("4K PDE flags mismatch at %VGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3072 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3073 cErrors++;
3074 continue;
3075 }
3076 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3077 {
3078 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%VGv PdeDst=%#RX64\n",
3079 GCPtr, (uint64_t)PdeDst.u));
3080 cErrors++;
3081 continue;
3082 }
3083
3084 /* iterate the page table. */
3085# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3086 const unsigned offPTSrc = 0;
3087# else
3088 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3089# endif
3090 for (unsigned iPT = 0, off = 0;
3091 iPT < ELEMENTS(pPTDst->a);
3092 iPT++, off += PAGE_SIZE)
3093 {
3094 const SHWPTE PteDst = pPTDst->a[iPT];
3095
3096 /* skip not-present entries. */
3097 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3098 continue;
3099 Assert(PteDst.n.u1Present);
3100
3101 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3102 if (!PteSrc.n.u1Present)
3103 {
3104#ifdef IN_RING3
3105 PGMAssertHandlerAndFlagsInSync(pVM);
3106 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3107#endif
3108 AssertMsgFailed(("Out of sync (!P) PTE at %VGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%VGv iPTSrc=%x PdeSrc=%x physpte=%VGp\n",
3109 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3110 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3111 cErrors++;
3112 continue;
3113 }
3114
3115 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3116# if 1 /** @todo sync accessed bit properly... */
3117 fIgnoreFlags |= X86_PTE_A;
3118# endif
3119
3120 /* match the physical addresses */
3121 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3122 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3123
3124# ifdef IN_RING3
3125 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3126 if (VBOX_FAILURE(rc))
3127 {
3128 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3129 {
3130 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3131 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3132 cErrors++;
3133 continue;
3134 }
3135 }
3136 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3137 {
3138 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3139 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3140 cErrors++;
3141 continue;
3142 }
3143# endif
3144
3145 rc = PGMRamGCPhys2HCPhysWithFlags(pPGM, GCPhysGst, &HCPhys);
3146 if (VBOX_FAILURE(rc))
3147 {
3148# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3149 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3150 {
3151 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3152 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3153 cErrors++;
3154 continue;
3155 }
3156# endif
3157 if (PteDst.n.u1Write)
3158 {
3159 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3160 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3161 cErrors++;
3162 }
3163 fIgnoreFlags |= X86_PTE_RW;
3164 }
3165 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3166 {
3167 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3168 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3169 cErrors++;
3170 continue;
3171 }
3172
3173 /* flags */
3174 if (HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE))
3175 {
3176 if (HCPhys & (MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE))
3177 {
3178 if (PteDst.n.u1Write)
3179 {
3180 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PteSrc=%#RX64 PteDst=%#RX64\n",
3181 GCPtr + off, HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3182 cErrors++;
3183 continue;
3184 }
3185 fIgnoreFlags |= X86_PTE_RW;
3186 }
3187 else
3188 {
3189 if (PteDst.n.u1Present)
3190 {
3191 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VHp PteSrc=%#RX64 PteDst=%#RX64\n",
3192 GCPtr + off, HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3193 cErrors++;
3194 continue;
3195 }
3196 fIgnoreFlags |= X86_PTE_P;
3197 }
3198 }
3199 else
3200 {
3201 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3202 {
3203 if (PteDst.n.u1Write)
3204 {
3205 AssertMsgFailed(("!DIRTY page at %VGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3206 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3207 cErrors++;
3208 continue;
3209 }
3210 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3211 {
3212 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3213 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3214 cErrors++;
3215 continue;
3216 }
3217 if (PteDst.n.u1Dirty)
3218 {
3219 AssertMsgFailed(("!DIRTY page at %VGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3220 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3221 cErrors++;
3222 }
3223# if 0 /** @todo sync access bit properly... */
3224 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3225 {
3226 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3227 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3228 cErrors++;
3229 }
3230 fIgnoreFlags |= X86_PTE_RW;
3231# else
3232 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3233# endif
3234 }
3235 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3236 {
3237 /* access bit emulation (not implemented). */
3238 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3239 {
3240 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3241 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3242 cErrors++;
3243 continue;
3244 }
3245 if (!PteDst.n.u1Accessed)
3246 {
3247 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3248 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3249 cErrors++;
3250 }
3251 fIgnoreFlags |= X86_PTE_P;
3252 }
3253# ifdef DEBUG_sandervl
3254 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3255# endif
3256 }
3257
3258 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3259 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
3260 )
3261 {
3262 AssertMsgFailed(("Flags mismatch at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3263 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3264 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3265 cErrors++;
3266 continue;
3267 }
3268 } /* foreach PTE */
3269 }
3270 else
3271 {
3272 /*
3273 * Big Page.
3274 */
3275 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | X86_PDE_PAE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
3276 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
3277 {
3278 if (PdeDst.n.u1Write)
3279 {
3280 AssertMsgFailed(("!DIRTY page at %VGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3281 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3282 cErrors++;
3283 continue;
3284 }
3285 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
3286 {
3287 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3288 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3289 cErrors++;
3290 continue;
3291 }
3292# if 0 /** @todo sync access bit properly... */
3293 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
3294 {
3295 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3296 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3297 cErrors++;
3298 }
3299 fIgnoreFlags |= X86_PTE_RW;
3300# else
3301 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3302# endif
3303 }
3304 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3305 {
3306 /* access bit emulation (not implemented). */
3307 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
3308 {
3309 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3310 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3311 cErrors++;
3312 continue;
3313 }
3314 if (!PdeDst.n.u1Accessed)
3315 {
3316 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3317 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3318 cErrors++;
3319 }
3320 fIgnoreFlags |= X86_PTE_P;
3321 }
3322
3323 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
3324 {
3325 AssertMsgFailed(("Flags mismatch (B) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
3326 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
3327 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3328 cErrors++;
3329 }
3330
3331 /* iterate the page table. */
3332 for (unsigned iPT = 0, off = 0;
3333 iPT < ELEMENTS(pPTDst->a);
3334 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
3335 {
3336 const SHWPTE PteDst = pPTDst->a[iPT];
3337
3338 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3339 {
3340 AssertMsgFailed(("The PTE at %VGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
3341 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3342 cErrors++;
3343 }
3344
3345 /* skip not-present entries. */
3346 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
3347 continue;
3348
3349 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3350
3351 /* match the physical addresses */
3352 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
3353
3354# ifdef IN_RING3
3355 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3356 if (VBOX_FAILURE(rc))
3357 {
3358 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3359 {
3360 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3361 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3362 cErrors++;
3363 }
3364 }
3365 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
3366 {
3367 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3368 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3369 cErrors++;
3370 continue;
3371 }
3372# endif
3373
3374 rc = PGMRamGCPhys2HCPhysWithFlags(pPGM, GCPhysGst, &HCPhys);
3375 if (VBOX_FAILURE(rc))
3376 {
3377# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3378 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3379 {
3380 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3381 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3382 cErrors++;
3383 continue;
3384 }
3385# endif
3386 if (PteDst.n.u1Write)
3387 {
3388 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3389 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3390 cErrors++;
3391 }
3392 fIgnoreFlags |= X86_PTE_RW;
3393 }
3394 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
3395 {
3396 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3397 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3398 cErrors++;
3399 continue;
3400 }
3401
3402 /* flags */
3403 if (HCPhys & (MM_RAM_FLAGS_PHYSICAL_ALL | MM_RAM_FLAGS_VIRTUAL_ALL | MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE))
3404 {
3405 if (HCPhys & (MM_RAM_FLAGS_PHYSICAL_WRITE | MM_RAM_FLAGS_VIRTUAL_WRITE))
3406 {
3407 if (!(HCPhys & MM_RAM_FLAGS_PHYSICAL_TEMP_OFF))
3408 {
3409 if (PteDst.n.u1Write)
3410 {
3411 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
3412 GCPtr + off, HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3413 cErrors++;
3414 continue;
3415 }
3416 fIgnoreFlags |= X86_PTE_RW;
3417 }
3418 }
3419 else
3420 {
3421 if (PteDst.n.u1Present)
3422 {
3423 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
3424 GCPtr + off, HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3425 cErrors++;
3426 continue;
3427 }
3428 fIgnoreFlags |= X86_PTE_P;
3429 }
3430 }
3431
3432 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3433 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
3434 )
3435 {
3436 AssertMsgFailed(("Flags mismatch (BT) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
3437 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3438 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3439 cErrors++;
3440 continue;
3441 }
3442 } /* foreach PTE */
3443 }
3444 }
3445 /* not present */
3446
3447 } /* forearch PDE */
3448
3449# ifdef DEBUG
3450 if (cErrors)
3451 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
3452# endif
3453
3454#elif PGM_GST_TYPE == PGM_TYPE_PAE
3455//# error not implemented
3456
3457
3458#elif PGM_GST_TYPE == PGM_TYPE_AMD64
3459//# error not implemented
3460
3461/*#else: guest real and protected mode */
3462#endif
3463 return cErrors;
3464}
3465#endif /* VBOX_STRICT */
3466
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette