VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 8659

Last change on this file since 8659 was 8659, checked in by vboxsync, 17 years ago

Updates for 64 bits paging.
Removed conditional dirty and accessed bits syncing. Doesn't make sense not to do this.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 162.7 KB
Line 
1/* $Id: PGMAllBth.h 8659 2008-05-07 14:39:41Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27__BEGIN_DECLS
28PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCUINTPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PGSTPD pPDSrc, RTGCUINTPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCUINTPTR GCPtr = 0, RTGCUINTPTR cb = ~(RTGCUINTPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42__END_DECLS
43
44
45/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
46#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE
47# error "Invalid combination; PAE guest implies PAE shadow"
48#endif
49
50#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
51 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE)
52# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
53#endif
54
55#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
56 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE)
57# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
58#endif
59
60#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64)
61 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64)
62# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
63#endif
64
65#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
66# define PGM_WITHOUT_MAPPINGS
67#endif
68
69/**
70 * #PF Handler for raw-mode guest execution.
71 *
72 * @returns VBox status code (appropriate for trap handling and GC return).
73 * @param pVM VM Handle.
74 * @param uErr The trap error code.
75 * @param pRegFrame Trap register frame.
76 * @param pvFault The fault address.
77 */
78PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
79{
80#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
81
82# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
83 /*
84 * Hide the instruction fetch trap indicator for now.
85 */
86 /** @todo NXE will change this and we must fix NXE in the switcher too! */
87 if (uErr & X86_TRAP_PF_ID)
88 {
89 uErr &= ~X86_TRAP_PF_ID;
90 TRPMSetErrorCode(pVM, uErr);
91 }
92# endif
93
94 /*
95 * Get PDs.
96 */
97 int rc;
98# if PGM_WITH_PAGING(PGM_GST_TYPE)
99# if PGM_GST_TYPE == PGM_TYPE_32BIT
100 const unsigned iPDSrc = (RTGCUINTPTR)pvFault >> GST_PD_SHIFT;
101 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
102
103# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
104
105# if PGM_GST_TYPE == PGM_TYPE_PAE
106 unsigned iPDSrc;
107 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, (RTGCUINTPTR)pvFault, &iPDSrc);
108
109# elif PGM_GST_TYPE == PGM_TYPE_AMD64
110 unsigned iPDSrc;
111 PX86PML4E pPml4e;
112 X86PDPE Pdpe;
113 PGSTPD pPDSrc;
114
115 pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, pvFault, &pPml4e, &Pdpe, &iPDSrc);
116 Assert(pPml4e);
117# endif
118 /* Quick check for a valid guest trap. */
119 if (!pPDSrc)
120 {
121 LogFlow(("Trap0eHandler: guest PDPTR not present CR3=%VGp\n", (CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK)));
122 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eGuestTrap; });
123 TRPMSetErrorCode(pVM, uErr);
124 return VINF_EM_RAW_GUEST_TRAP;
125 }
126# endif
127# else
128 PGSTPD pPDSrc = NULL;
129 const unsigned iPDSrc = 0;
130# endif
131
132# if PGM_SHW_TYPE == PGM_TYPE_32BIT
133 const unsigned iPDDst = (RTGCUINTPTR)pvFault >> SHW_PD_SHIFT;
134 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
135# elif PGM_SHW_TYPE == PGM_TYPE_PAE
136 const unsigned iPDDst = (RTGCUINTPTR)pvFault >> SHW_PD_SHIFT;
137 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]; /* We treat this as a PD with 2048 entries, so no need to and with SHW_PD_MASK to get iPDDst */
138
139# if PGM_GST_TYPE == PGM_TYPE_PAE
140 /* Did we mark the PDPT as not present in SyncCR3? */
141 unsigned iPDPTE = ((RTGCUINTPTR)pvFault >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
142 if (!pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].n.u1Present)
143 pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].n.u1Present = 1;
144
145# endif
146
147# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
148 const unsigned iPDDst = (((RTGCUINTPTR)pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
149 PX86PDPAE pPDDst;
150
151 rc = PGMShwGetLongModePDPtr(pVM, (RTGCUINTPTR)pvFault, &pPDDst);
152 if (rc != VINF_SUCCESS)
153 {
154 AssertMsg(rc == VINF_PGM_SYNC_CR3, ("Unexpected rc=%Vrc\n", rc));
155 return rc;
156 }
157 Assert(pPDDst);
158# endif
159
160# if PGM_WITH_PAGING(PGM_GST_TYPE)
161 /*
162 * If we successfully correct the write protection fault due to dirty bit
163 * tracking, or this page fault is a genuine one, then return immediately.
164 */
165 STAM_PROFILE_START(&pVM->pgm.s.StatCheckPageFault, e);
166 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], (RTGCUINTPTR)pvFault);
167 STAM_PROFILE_STOP(&pVM->pgm.s.StatCheckPageFault, e);
168 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
169 || rc == VINF_EM_RAW_GUEST_TRAP)
170 {
171 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution)
172 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatTrap0eDirtyAndAccessedBits : &pVM->pgm.s.StatTrap0eGuestTrap; });
173 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
174 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
175 }
176
177 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0ePD[iPDSrc]);
178# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
179
180 /*
181 * A common case is the not-present error caused by lazy page table syncing.
182 *
183 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
184 * so we can safely assume that the shadow PT is present when calling SyncPage later.
185 *
186 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
187 * of mapping conflict and defer to SyncCR3 in R3.
188 * (Again, we do NOT support access handlers for non-present guest pages.)
189 *
190 */
191# if PGM_WITH_PAGING(PGM_GST_TYPE)
192 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
193# else
194 GSTPDE PdeSrc;
195 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
196 PdeSrc.n.u1Present = 1;
197 PdeSrc.n.u1Write = 1;
198 PdeSrc.n.u1Accessed = 1;
199 PdeSrc.n.u1User = 1;
200# endif
201 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
202 && !pPDDst->a[iPDDst].n.u1Present
203 && PdeSrc.n.u1Present
204 )
205
206 {
207 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eSyncPT; });
208 STAM_PROFILE_START(&pVM->pgm.s.StatLazySyncPT, f);
209 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
210 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, (RTGCUINTPTR)pvFault);
211 if (VBOX_SUCCESS(rc))
212 {
213 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
214 return rc;
215 }
216 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
217 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
218 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
219 return VINF_PGM_SYNC_CR3;
220 }
221
222# if PGM_WITH_PAGING(PGM_GST_TYPE)
223 /*
224 * Check if this address is within any of our mappings.
225 *
226 * This is *very* fast and it's gonna save us a bit of effort below and prevent
227 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
228 * (BTW, it's impossible to have physical access handlers in a mapping.)
229 */
230 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
231 {
232 STAM_PROFILE_START(&pVM->pgm.s.StatMapping, a);
233 PPGMMAPPING pMapping = CTXALLSUFF(pVM->pgm.s.pMappings);
234 for ( ; pMapping; pMapping = CTXALLSUFF(pMapping->pNext))
235 {
236 if ((RTGCUINTPTR)pvFault < (RTGCUINTPTR)pMapping->GCPtr)
237 break;
238 if ((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pMapping->GCPtr < pMapping->cb)
239 {
240 /*
241 * The first thing we check is if we've got an undetected conflict.
242 */
243 if (!pVM->pgm.s.fMappingsFixed)
244 {
245 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
246 while (iPT-- > 0)
247 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
248 {
249 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eConflicts);
250 Log(("Trap0e: Detected Conflict %VGv-%VGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
251 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
252 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
253 return VINF_PGM_SYNC_CR3;
254 }
255 }
256
257 /*
258 * Check if the fault address is in a virtual page access handler range.
259 */
260 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->HyperVirtHandlers, pvFault);
261 if ( pCur
262 && (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
263 && uErr & X86_TRAP_PF_RW)
264 {
265# ifdef IN_GC
266 STAM_PROFILE_START(&pCur->Stat, h);
267 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
268 STAM_PROFILE_STOP(&pCur->Stat, h);
269# else
270 AssertFailed();
271 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
272# endif
273 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eMapHandler);
274 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
275 return rc;
276 }
277
278 /*
279 * Pretend we're not here and let the guest handle the trap.
280 */
281 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
282 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eMap);
283 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
284 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
285 return VINF_EM_RAW_GUEST_TRAP;
286 }
287 }
288 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
289 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
290# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
291
292 /*
293 * Check if this fault address is flagged for special treatment,
294 * which means we'll have to figure out the physical address and
295 * check flags associated with it.
296 *
297 * ASSUME that we can limit any special access handling to pages
298 * in page tables which the guest believes to be present.
299 */
300 if (PdeSrc.n.u1Present)
301 {
302 RTGCPHYS GCPhys = NIL_RTGCPHYS;
303
304# if PGM_WITH_PAGING(PGM_GST_TYPE)
305 uint32_t cr4 = CPUMGetGuestCR4(pVM);
306 if ( PdeSrc.b.u1Size
307 && (cr4 & X86_CR4_PSE))
308 GCPhys = (PdeSrc.u & GST_PDE_BIG_PG_MASK)
309 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
310 else
311 {
312 PGSTPT pPTSrc;
313 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
314 if (VBOX_SUCCESS(rc))
315 {
316 unsigned iPTESrc = ((RTGCUINTPTR)pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
317 if (pPTSrc->a[iPTESrc].n.u1Present)
318 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
319 }
320 }
321# else
322 /* No paging so the fault address is the physical address */
323 GCPhys = (RTGCPHYS)((RTGCUINTPTR)pvFault & ~PAGE_OFFSET_MASK);
324# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
325
326 /*
327 * If we have a GC address we'll check if it has any flags set.
328 */
329 if (GCPhys != NIL_RTGCPHYS)
330 {
331 STAM_PROFILE_START(&pVM->pgm.s.StatHandlers, b);
332
333 PPGMPAGE pPage;
334 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
335 if (VBOX_SUCCESS(rc))
336 {
337 if (PGM_PAGE_HAS_ANY_HANDLERS(pPage))
338 {
339 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
340 {
341 /*
342 * Physical page access handler.
343 */
344 const RTGCPHYS GCPhysFault = GCPhys | ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK);
345 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->PhysHandlers, GCPhysFault);
346 if (pCur)
347 {
348# ifdef PGM_SYNC_N_PAGES
349 /*
350 * If the region is write protected and we got a page not present fault, then sync
351 * the pages. If the fault was caused by a read, then restart the instruction.
352 * In case of write access continue to the GC write handler.
353 *
354 * ASSUMES that there is only one handler per page or that they have similar write properties.
355 */
356 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
357 && !(uErr & X86_TRAP_PF_P))
358 {
359 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
360 if ( VBOX_FAILURE(rc)
361 || !(uErr & X86_TRAP_PF_RW)
362 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
363 {
364 AssertRC(rc);
365 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
366 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
367 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
368 return rc;
369 }
370 }
371# endif
372
373 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
374 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
375 ("Unexpected trap for physical handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
376
377#if defined(IN_GC) || defined(IN_RING0)
378 if (CTXALLSUFF(pCur->pfnHandler))
379 {
380 STAM_PROFILE_START(&pCur->Stat, h);
381 rc = pCur->CTXALLSUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, CTXALLSUFF(pCur->pvUser));
382 STAM_PROFILE_STOP(&pCur->Stat, h);
383 }
384 else
385#endif
386 rc = VINF_EM_RAW_EMULATE_INSTR;
387 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersPhysical);
388 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
389 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndPhys; });
390 return rc;
391 }
392 }
393# if PGM_WITH_PAGING(PGM_GST_TYPE)
394 else
395 {
396# ifdef PGM_SYNC_N_PAGES
397 /*
398 * If the region is write protected and we got a page not present fault, then sync
399 * the pages. If the fault was caused by a read, then restart the instruction.
400 * In case of write access continue to the GC write handler.
401 */
402 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
403 && !(uErr & X86_TRAP_PF_P))
404 {
405 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
406 if ( VBOX_FAILURE(rc)
407 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
408 || !(uErr & X86_TRAP_PF_RW))
409 {
410 AssertRC(rc);
411 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
412 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
413 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndVirt; });
414 return rc;
415 }
416 }
417# endif
418 /*
419 * Ok, it's an virtual page access handler.
420 *
421 * Since it's faster to search by address, we'll do that first
422 * and then retry by GCPhys if that fails.
423 */
424 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
425 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
426 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
427 */
428 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
429 if (pCur)
430 {
431 AssertMsg(!((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
432 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
433 || !(uErr & X86_TRAP_PF_P)
434 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
435 ("Unexpected trap for virtual handler: %VGv (phys=%VGp) HCPhys=%HGp uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
436
437 if ( (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
438 && ( uErr & X86_TRAP_PF_RW
439 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
440 {
441# ifdef IN_GC
442 STAM_PROFILE_START(&pCur->Stat, h);
443 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
444 STAM_PROFILE_STOP(&pCur->Stat, h);
445# else
446 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
447# endif
448 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtual);
449 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
450 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
451 return rc;
452 }
453 /* Unhandled part of a monitored page */
454 }
455 else
456 {
457 /* Check by physical address. */
458 PPGMVIRTHANDLER pCur;
459 unsigned iPage;
460 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK),
461 &pCur, &iPage);
462 Assert(VBOX_SUCCESS(rc) || !pCur);
463 if ( pCur
464 && ( uErr & X86_TRAP_PF_RW
465 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
466 {
467 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
468# ifdef IN_GC
469 RTGCUINTPTR off = (iPage << PAGE_SHIFT) + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK) - ((RTGCUINTPTR)pCur->GCPtr & PAGE_OFFSET_MASK);
470 Assert(off < pCur->cb);
471 STAM_PROFILE_START(&pCur->Stat, h);
472 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, off);
473 STAM_PROFILE_STOP(&pCur->Stat, h);
474# else
475 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
476# endif
477 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualByPhys);
478 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
479 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
480 return rc;
481 }
482 }
483 }
484# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
485
486 /*
487 * There is a handled area of the page, but this fault doesn't belong to it.
488 * We must emulate the instruction.
489 *
490 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
491 * we first check if this was a page-not-present fault for a page with only
492 * write access handlers. Restart the instruction if it wasn't a write access.
493 */
494 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersUnhandled);
495
496 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
497 && !(uErr & X86_TRAP_PF_P))
498 {
499 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
500 if ( VBOX_FAILURE(rc)
501 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
502 || !(uErr & X86_TRAP_PF_RW))
503 {
504 AssertRC(rc);
505 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
506 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
507 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
508 return rc;
509 }
510 }
511
512 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
513 * It's writing to an unhandled part of the LDT page several million times.
514 */
515 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
516 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d HCPhys=%RHp%s%s\n",
517 rc, pPage->HCPhys,
518 PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage) ? " phys" : "",
519 PGM_PAGE_HAS_ANY_VIRTUAL_HANDLERS(pPage) ? " virt" : ""));
520 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
521 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndUnhandled; });
522 return rc;
523 } /* if any kind of handler */
524
525# if PGM_WITH_PAGING(PGM_GST_TYPE)
526 if (uErr & X86_TRAP_PF_P)
527 {
528 /*
529 * The page isn't marked, but it might still be monitored by a virtual page access handler.
530 * (ASSUMES no temporary disabling of virtual handlers.)
531 */
532 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
533 * we should correct both the shadow page table and physical memory flags, and not only check for
534 * accesses within the handler region but for access to pages with virtual handlers. */
535 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
536 if (pCur)
537 {
538 AssertMsg( !((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
539 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
540 || !(uErr & X86_TRAP_PF_P)
541 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
542 ("Unexpected trap for virtual handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
543
544 if ( (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
545 && ( uErr & X86_TRAP_PF_RW
546 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
547 {
548# ifdef IN_GC
549 STAM_PROFILE_START(&pCur->Stat, h);
550 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
551 STAM_PROFILE_STOP(&pCur->Stat, h);
552# else
553 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
554# endif
555 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualUnmarked);
556 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
557 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
558 return rc;
559 }
560 }
561 }
562# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
563 }
564 else
565 {
566 /* When the guest accesses invalid physical memory (e.g. probing of RAM or accessing a remapped MMIO range), then we'll fall
567 * back to the recompiler to emulate the instruction.
568 */
569 LogFlow(("pgmPhysGetPageEx %VGp failed with %Vrc\n", GCPhys, rc));
570 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersInvalid);
571 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
572 return VINF_EM_RAW_EMULATE_INSTR;
573 }
574
575 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
576
577# ifdef PGM_OUT_OF_SYNC_IN_GC
578 /*
579 * We are here only if page is present in Guest page tables and trap is not handled
580 * by our handlers.
581 * Check it for page out-of-sync situation.
582 */
583 STAM_PROFILE_START(&pVM->pgm.s.StatOutOfSync, c);
584
585 if (!(uErr & X86_TRAP_PF_P))
586 {
587 /*
588 * Page is not present in our page tables.
589 * Try to sync it!
590 * BTW, fPageShw is invalid in this branch!
591 */
592 if (uErr & X86_TRAP_PF_US)
593 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
594 else /* supervisor */
595 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
596
597# if defined(LOG_ENABLED) && !defined(IN_RING0)
598 RTGCPHYS GCPhys;
599 uint64_t fPageGst;
600 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
601 Log(("Page out of sync: %p eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%VGp scan=%d\n",
602 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTGCPTR)pRegFrame->eip)));
603# endif /* LOG_ENABLED */
604
605# if PGM_WITH_PAGING(PGM_GST_TYPE) && !defined(IN_RING0)
606 if (CPUMGetGuestCPL(pVM, pRegFrame) == 0)
607 {
608 uint64_t fPageGst;
609 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
610 if ( VBOX_SUCCESS(rc)
611 && !(fPageGst & X86_PTE_US))
612 {
613 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
614 if ( pvFault == (RTGCPTR)pRegFrame->eip
615 || (RTGCUINTPTR)pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
616# ifdef CSAM_DETECT_NEW_CODE_PAGES
617 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
618 && CSAMDoesPageNeedScanning(pVM, (RTGCPTR)pRegFrame->eip)) /* any new code we encounter here */
619# endif /* CSAM_DETECT_NEW_CODE_PAGES */
620 )
621 {
622 LogFlow(("CSAMExecFault %VGv\n", pRegFrame->eip));
623 rc = CSAMExecFault(pVM, (RTGCPTR)pRegFrame->eip);
624 if (rc != VINF_SUCCESS)
625 {
626 /*
627 * CSAM needs to perform a job in ring 3.
628 *
629 * Sync the page before going to the host context; otherwise we'll end up in a loop if
630 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
631 */
632 LogFlow(("CSAM ring 3 job\n"));
633 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
634 AssertRC(rc2);
635
636 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
637 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eCSAM; });
638 return rc;
639 }
640 }
641# ifdef CSAM_DETECT_NEW_CODE_PAGES
642 else
643 if ( uErr == X86_TRAP_PF_RW
644 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
645 && pRegFrame->ecx < 0x10000
646 )
647 {
648 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
649 * to detect loading of new code pages.
650 */
651
652 /*
653 * Decode the instruction.
654 */
655 RTGCPTR PC;
656 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
657 if (rc == VINF_SUCCESS)
658 {
659 DISCPUSTATE Cpu;
660 uint32_t cbOp;
661 rc = EMInterpretDisasOneEx(pVM, (RTGCUINTPTR)PC, pRegFrame, &Cpu, &cbOp);
662
663 /* For now we'll restrict this to rep movsw/d instructions */
664 if ( rc == VINF_SUCCESS
665 && Cpu.pCurInstr->opcode == OP_MOVSWD
666 && (Cpu.prefix & PREFIX_REP))
667 {
668 CSAMMarkPossibleCodePage(pVM, pvFault);
669 }
670 }
671 }
672# endif /* CSAM_DETECT_NEW_CODE_PAGES */
673
674 /*
675 * Mark this page as safe.
676 */
677 /** @todo not correct for pages that contain both code and data!! */
678 Log2(("CSAMMarkPage %p; scanned=%d\n", pvFault, true));
679 CSAMMarkPage(pVM, pvFault, true);
680 }
681 }
682# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) && !defined(IN_RING0) */
683 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
684 if (VBOX_SUCCESS(rc))
685 {
686 /* The page was successfully synced, return to the guest. */
687 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
688 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSync; });
689 return VINF_SUCCESS;
690 }
691 }
692 else
693 {
694 /*
695 * A side effect of not flushing global PDEs are out of sync pages due
696 * to physical monitored regions, that are no longer valid.
697 * Assume for now it only applies to the read/write flag
698 */
699 if (VBOX_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
700 {
701 if (uErr & X86_TRAP_PF_US)
702 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
703 else /* supervisor */
704 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
705
706
707 /*
708 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the page is not present, which is not true in this case.
709 */
710 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
711 if (VBOX_SUCCESS(rc))
712 {
713 /*
714 * Page was successfully synced, return to guest.
715 */
716# ifdef VBOX_STRICT
717 RTGCPHYS GCPhys;
718 uint64_t fPageGst;
719 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
720 Assert(VBOX_SUCCESS(rc) && fPageGst & X86_PTE_RW);
721 LogFlow(("Obsolete physical monitor page out of sync %VGv - phys %VGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
722
723 uint64_t fPageShw;
724 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
725 Assert(VBOX_SUCCESS(rc) && fPageShw & X86_PTE_RW);
726# endif /* VBOX_STRICT */
727 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
728 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncObsHnd; });
729 return VINF_SUCCESS;
730 }
731
732 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
733 if ( CPUMGetGuestCPL(pVM, pRegFrame) == 0
734 && ((CPUMGetGuestCR0(pVM) & (X86_CR0_WP|X86_CR0_PG)) == X86_CR0_PG)
735 && (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P))
736 {
737 uint64_t fPageGst;
738 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
739 if ( VBOX_SUCCESS(rc)
740 && !(fPageGst & X86_PTE_RW))
741 {
742 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
743 if (VBOX_SUCCESS(rc))
744 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eWPEmulGC);
745 else
746 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eWPEmulR3);
747 return rc;
748 }
749 else
750 AssertMsgFailed(("Unexpected r/w page %x flag=%x\n", pvFault, (uint32_t)fPageGst));
751 }
752
753 }
754
755# if PGM_WITH_PAGING(PGM_GST_TYPE)
756# ifdef VBOX_STRICT
757 /*
758 * Check for VMM page flags vs. Guest page flags consistency.
759 * Currently only for debug purposes.
760 */
761 if (VBOX_SUCCESS(rc))
762 {
763 /* Get guest page flags. */
764 uint64_t fPageGst;
765 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
766 if (VBOX_SUCCESS(rc))
767 {
768 uint64_t fPageShw;
769 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
770
771 /*
772 * Compare page flags.
773 * Note: we have AVL, A, D bits desynched.
774 */
775 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
776 ("Page flags mismatch! pvFault=%p GCPhys=%VGp fPageShw=%08llx fPageGst=%08llx\n", pvFault, GCPhys, fPageShw, fPageGst));
777 }
778 else
779 AssertMsgFailed(("PGMGstGetPage rc=%Vrc\n", rc));
780 }
781 else
782 AssertMsgFailed(("PGMGCGetPage rc=%Vrc\n", rc));
783# endif /* VBOX_STRICT */
784# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
785 }
786 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
787# endif /* PGM_OUT_OF_SYNC_IN_GC */
788 }
789 else
790 {
791 /*
792 * Page not present in Guest OS or invalid page table address.
793 * This is potential virtual page access handler food.
794 *
795 * For the present we'll say that our access handlers don't
796 * work for this case - we've already discarded the page table
797 * not present case which is identical to this.
798 *
799 * When we perchance find we need this, we will probably have AVL
800 * trees (offset based) to operate on and we can measure their speed
801 * agains mapping a page table and probably rearrange this handling
802 * a bit. (Like, searching virtual ranges before checking the
803 * physical address.)
804 */
805 }
806 }
807
808
809# if PGM_WITH_PAGING(PGM_GST_TYPE)
810 /*
811 * Conclusion, this is a guest trap.
812 */
813 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
814 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eUnhandled);
815 return VINF_EM_RAW_GUEST_TRAP;
816# else
817 /* present, but not a monitored page; perhaps the guest is probing physical memory */
818 return VINF_EM_RAW_EMULATE_INSTR;
819# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
820
821
822#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
823
824 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
825 return VERR_INTERNAL_ERROR;
826#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
827}
828
829
830/**
831 * Emulation of the invlpg instruction.
832 *
833 *
834 * @returns VBox status code.
835 *
836 * @param pVM VM handle.
837 * @param GCPtrPage Page to invalidate.
838 *
839 * @remark ASSUMES that the guest is updating before invalidating. This order
840 * isn't required by the CPU, so this is speculative and could cause
841 * trouble.
842 *
843 * @todo Flush page or page directory only if necessary!
844 * @todo Add a #define for simply invalidating the page.
845 */
846PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage)
847{
848#if PGM_GST_TYPE == PGM_TYPE_32BIT \
849 || PGM_GST_TYPE == PGM_TYPE_PAE \
850 || PGM_GST_TYPE == PGM_TYPE_AMD64
851 int rc;
852
853 LogFlow(("InvalidatePage %x\n", GCPtrPage));
854 /*
855 * Get the shadow PD entry and skip out if this PD isn't present.
856 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
857 */
858 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
859# if PGM_SHW_TYPE == PGM_TYPE_32BIT
860 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
861# elif PGM_SHW_TYPE == PGM_TYPE_PAE
862 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs[0])->a[iPDDst];
863# else /* AMD64 */
864 /* PML4 */
865 const unsigned iPml4 = ((RTGCUINTPTR64)GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
866 PX86PML4E pPml4eDst = &CTXMID(pVM->pgm.s.p,PaePML4)->a[iPml4];
867 if (!pPml4eDst->n.u1Present)
868 {
869 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
870 return VINF_SUCCESS;
871 }
872
873 /* PDPT */
874 PX86PDPT pPDPT;
875 rc = PGM_HCPHYS_2_PTR(pVM, pPml4eDst->u & X86_PML4E_PG_MASK, &pPDPT);
876 if (VBOX_FAILURE(rc))
877 return rc;
878 const unsigned iPDPT = ((RTGCUINTPTR64)GCPtrPage >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
879 PX86PDPE pPdpeDst = &pPDPT->a[iPDPT];
880 if (!pPdpeDst->n.u1Present)
881 {
882 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
883 return VINF_SUCCESS;
884 }
885
886 /* PD */
887 PX86PDPAE pPd;
888 rc = PGM_HCPHYS_2_PTR(pVM, pPdpeDst->u & X86_PDPE_PG_MASK, &pPd);
889 if (VBOX_FAILURE(rc))
890 return rc;
891 const unsigned iPd = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
892 PX86PDEPAE pPdeDst = &pPd->a[iPd];
893# endif
894
895 const SHWPDE PdeDst = *pPdeDst;
896 if (!PdeDst.n.u1Present)
897 {
898 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
899 return VINF_SUCCESS;
900 }
901
902 /*
903 * Get the guest PD entry and calc big page.
904 */
905# if PGM_GST_TYPE == PGM_TYPE_32BIT
906 PX86PD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
907 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
908 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
909# else
910 unsigned iPDSrc;
911# if PGM_GST_TYPE == PGM_TYPE_PAE
912 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
913# else /* AMD64 */
914 PX86PML4E pPml4eSrc;
915 X86PDPE PdpeSrc;
916 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
917# endif
918 GSTPDE PdeSrc;
919
920 if (pPDSrc)
921 PdeSrc = pPDSrc->a[iPDSrc];
922 else
923 PdeSrc.u = 0;
924# endif
925
926 const uint32_t cr4 = CPUMGetGuestCR4(pVM);
927 const bool fIsBigPage = PdeSrc.b.u1Size && (cr4 & X86_CR4_PSE);
928
929# ifdef IN_RING3
930 /*
931 * If a CR3 Sync is pending we may ignore the invalidate page operation
932 * depending on the kind of sync and if it's a global page or not.
933 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
934 */
935# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
936 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
937 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
938 && fIsBigPage
939 && PdeSrc.b.u1Global
940 && (cr4 & X86_CR4_PGE)
941 )
942 )
943# else
944 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
945# endif
946 {
947 STAM_COUNTER_INC(&pVM->pgm.s.StatHCInvalidatePageSkipped);
948 return VINF_SUCCESS;
949 }
950# endif /* IN_RING3 */
951
952
953# if PGM_GST_TYPE == PGM_TYPE_AMD64
954 Assert(pPml4eDst->n.u1Present && pPml4eDst->u & SHW_PDPT_MASK);
955 if (pPml4eSrc->n.u1Present)
956 {
957 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
958 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
959 {
960 /*
961 * Mark not present so we can resync the PML4E when it's used.
962 */
963 LogFlow(("InvalidatePage: Out-of-sync PML4E at %VGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
964 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
965 pgmPoolFree(pVM, pPml4eDst->u & X86_PML4E_PG_MASK, PGMPOOL_IDX_PML4, iPml4);
966 pPml4eDst->u = 0;
967 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
968 PGM_INVL_GUEST_TLBS();
969 }
970 else if (!pPml4eSrc->n.u1Accessed)
971 {
972 /*
973 * Mark not present so we can set the accessed bit.
974 */
975 pgmPoolFree(pVM, pPml4eDst->u & X86_PML4E_PG_MASK, PGMPOOL_IDX_PML4, iPml4);
976 pPml4eDst->u = 0;
977 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
978 PGM_INVL_GUEST_TLBS();
979 }
980 }
981 else
982 {
983 pgmPoolFree(pVM, pPml4eDst->u & X86_PML4E_PG_MASK, PGMPOOL_IDX_PML4, iPml4);
984 pPml4eDst->u = 0;
985 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
986 PGM_INVL_PG(GCPtrPage);
987 return VINF_SUCCESS;
988 }
989
990 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
991 if (PdpeSrc.n.u1Present)
992 {
993 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
994 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
995 {
996 /*
997 * Mark not present so we can resync the PML4E when it's used.
998 */
999 LogFlow(("InvalidatePage: Out-of-sync PDPE at %VGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1000 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1001 pgmPoolFree(pVM, pPdpeDst->u & SHW_PDPT_MASK, PGMPOOL_IDX_PML4, iPml4);
1002 pPdpeDst->u = 0;
1003 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1004 PGM_INVL_GUEST_TLBS();
1005 }
1006 else if (!PdpeSrc.lm.u1Accessed)
1007 {
1008 /*
1009 * Mark not present so we can set the accessed bit.
1010 */
1011 pgmPoolFree(pVM, pPdpeDst->u & SHW_PDPT_MASK, PGMPOOL_IDX_PML4, iPml4);
1012 pPdpeDst->u = 0;
1013 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
1014 PGM_INVL_GUEST_TLBS();
1015 }
1016 }
1017 else
1018 {
1019 pgmPoolFree(pVM, pPdpeDst->u & SHW_PDPT_MASK, PGMPOOL_IDX_PDPT, iPDDst);
1020 pPdpeDst->u = 0;
1021 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
1022 PGM_INVL_PG(GCPtrPage);
1023 return VINF_SUCCESS;
1024 }
1025# endif
1026
1027 /*
1028 * Deal with the Guest PDE.
1029 */
1030 rc = VINF_SUCCESS;
1031 if (PdeSrc.n.u1Present)
1032 {
1033 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1034 {
1035 /*
1036 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1037 */
1038 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1039 Assert(PGMGetGuestMode(pVM) <= PGMMODE_32_BIT);
1040 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
1041 }
1042 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
1043 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1044 {
1045 /*
1046 * Mark not present so we can resync the PDE when it's used.
1047 */
1048 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1049 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1050 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1051 pPdeDst->u = 0;
1052 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1053 PGM_INVL_GUEST_TLBS();
1054 }
1055 else if (!PdeSrc.n.u1Accessed)
1056 {
1057 /*
1058 * Mark not present so we can set the accessed bit.
1059 */
1060 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1061 pPdeDst->u = 0;
1062 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
1063 PGM_INVL_GUEST_TLBS();
1064 }
1065 else if (!fIsBigPage)
1066 {
1067 /*
1068 * 4KB - page.
1069 */
1070 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1071 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1072# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1073 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1074 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1075# endif
1076 if (pShwPage->GCPhys == GCPhys)
1077 {
1078# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1079 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1080 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1081 if (pPT->a[iPTEDst].n.u1Present)
1082 {
1083# ifdef PGMPOOL_WITH_USER_TRACKING
1084 /* This is very unlikely with caching/monitoring enabled. */
1085 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1086# endif
1087 pPT->a[iPTEDst].u = 0;
1088 }
1089# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1090 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
1091 if (VBOX_SUCCESS(rc))
1092 rc = VINF_SUCCESS;
1093# endif
1094 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4KBPages));
1095 PGM_INVL_PG(GCPtrPage);
1096 }
1097 else
1098 {
1099 /*
1100 * The page table address changed.
1101 */
1102 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%VGp iPDDst=%#x\n",
1103 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1104 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1105 pPdeDst->u = 0;
1106 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1107 PGM_INVL_GUEST_TLBS();
1108 }
1109 }
1110 else
1111 {
1112 /*
1113 * 2/4MB - page.
1114 */
1115 /* Before freeing the page, check if anything really changed. */
1116 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1117 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
1118# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1119 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1120 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1121# endif
1122 if ( pShwPage->GCPhys == GCPhys
1123 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1124 {
1125 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1126 /** @todo PAT */
1127 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1128 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1129 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1130 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1131 {
1132 LogFlow(("Skipping flush for big page containing %VGv (PD=%X .u=%VX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1133 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPagesSkip));
1134 return VINF_SUCCESS;
1135 }
1136 }
1137
1138 /*
1139 * Ok, the page table is present and it's been changed in the guest.
1140 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1141 * We could do this for some flushes in GC too, but we need an algorithm for
1142 * deciding which 4MB pages containing code likely to be executed very soon.
1143 */
1144 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1145 pPdeDst->u = 0;
1146 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPages));
1147 PGM_INVL_BIG_PG(GCPtrPage);
1148 }
1149 }
1150 else
1151 {
1152 /*
1153 * Page directory is not present, mark shadow PDE not present.
1154 */
1155 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1156 {
1157 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1158 pPdeDst->u = 0;
1159 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
1160 PGM_INVL_PG(GCPtrPage);
1161 }
1162 else
1163 {
1164 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1165 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDMappings));
1166 }
1167 }
1168
1169 return rc;
1170
1171#else /* guest real and protected mode */
1172 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1173 return VINF_SUCCESS;
1174#endif
1175}
1176
1177
1178#ifdef PGMPOOL_WITH_USER_TRACKING
1179/**
1180 * Update the tracking of shadowed pages.
1181 *
1182 * @param pVM The VM handle.
1183 * @param pShwPage The shadow page.
1184 * @param HCPhys The physical page we is being dereferenced.
1185 */
1186DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1187{
1188# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1189 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1190 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%VHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1191
1192 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1193 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1194 * 2. write protect all shadowed pages. I.e. implement caching.
1195 */
1196 /*
1197 * Find the guest address.
1198 */
1199 for (PPGMRAMRANGE pRam = CTXALLSUFF(pVM->pgm.s.pRamRanges);
1200 pRam;
1201 pRam = CTXALLSUFF(pRam->pNext))
1202 {
1203 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1204 while (iPage-- > 0)
1205 {
1206 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1207 {
1208 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1209 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1210 pShwPage->cPresent--;
1211 pPool->cPresent--;
1212 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1213 return;
1214 }
1215 }
1216 }
1217
1218 for (;;)
1219 AssertReleaseMsgFailed(("HCPhys=%VHp wasn't found!\n", HCPhys));
1220# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1221 pShwPage->cPresent--;
1222 pVM->pgm.s.CTXSUFF(pPool)->cPresent--;
1223# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1224}
1225
1226
1227/**
1228 * Update the tracking of shadowed pages.
1229 *
1230 * @param pVM The VM handle.
1231 * @param pShwPage The shadow page.
1232 * @param u16 The top 16-bit of the pPage->HCPhys.
1233 * @param pPage Pointer to the guest page. this will be modified.
1234 * @param iPTDst The index into the shadow table.
1235 */
1236DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1237{
1238# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1239 /*
1240 * We're making certain assumptions about the placement of cRef and idx.
1241 */
1242 Assert(MM_RAM_FLAGS_IDX_SHIFT == 48);
1243 Assert(MM_RAM_FLAGS_CREFS_SHIFT > MM_RAM_FLAGS_IDX_SHIFT);
1244
1245 /*
1246 * Just deal with the simple first time here.
1247 */
1248 if (!u16)
1249 {
1250 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1251 u16 = (1 << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) | pShwPage->idx;
1252 }
1253 else
1254 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1255
1256 /* write back, trying to be clever... */
1257 Log2(("SyncPageWorkerTrackAddRef: u16=%#x pPage->HCPhys=%VHp->%VHp iPTDst=%#x\n",
1258 u16, pPage->HCPhys, (pPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) | ((uint64_t)u16 << MM_RAM_FLAGS_CREFS_SHIFT), iPTDst));
1259 *((uint16_t *)&pPage->HCPhys + 3) = u16; /** @todo PAGE FLAGS */
1260# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1261
1262 /* update statistics. */
1263 pVM->pgm.s.CTXSUFF(pPool)->cPresent++;
1264 pShwPage->cPresent++;
1265 if (pShwPage->iFirstPresent > iPTDst)
1266 pShwPage->iFirstPresent = iPTDst;
1267}
1268#endif /* PGMPOOL_WITH_USER_TRACKING */
1269
1270
1271/**
1272 * Creates a 4K shadow page for a guest page.
1273 *
1274 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1275 * physical address. The PdeSrc argument only the flags are used. No page structured
1276 * will be mapped in this function.
1277 *
1278 * @param pVM VM handle.
1279 * @param pPteDst Destination page table entry.
1280 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1281 * Can safely assume that only the flags are being used.
1282 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1283 * @param pShwPage Pointer to the shadow page.
1284 * @param iPTDst The index into the shadow table.
1285 *
1286 * @remark Not used for 2/4MB pages!
1287 */
1288DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1289{
1290 if (PteSrc.n.u1Present)
1291 {
1292 /*
1293 * Find the ram range.
1294 */
1295 PPGMPAGE pPage;
1296 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1297 if (VBOX_SUCCESS(rc))
1298 {
1299 /** @todo investiage PWT, PCD and PAT. */
1300 /*
1301 * Make page table entry.
1302 */
1303 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo FLAGS */
1304 SHWPTE PteDst;
1305 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1306 {
1307 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1308 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1309 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1310 | (HCPhys & X86_PTE_PAE_PG_MASK);
1311 else
1312 {
1313 LogFlow(("SyncPageWorker: monitored page (%VGp) -> mark not present\n", HCPhys));
1314 PteDst.u = 0;
1315 }
1316 /** @todo count these two kinds. */
1317 }
1318 else
1319 {
1320 /*
1321 * If the page or page directory entry is not marked accessed,
1322 * we mark the page not present.
1323 */
1324 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1325 {
1326 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1327 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,AccessedPage));
1328 PteDst.u = 0;
1329 }
1330 else
1331 /*
1332 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1333 * when the page is modified.
1334 */
1335 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1336 {
1337 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPage));
1338 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1339 | (HCPhys & X86_PTE_PAE_PG_MASK)
1340 | PGM_PTFLAGS_TRACK_DIRTY;
1341 }
1342 else
1343 {
1344 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageSkipped));
1345 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1346 | (HCPhys & X86_PTE_PAE_PG_MASK);
1347 }
1348 }
1349
1350#ifdef PGMPOOL_WITH_USER_TRACKING
1351 /*
1352 * Keep user track up to date.
1353 */
1354 if (PteDst.n.u1Present)
1355 {
1356 if (!pPteDst->n.u1Present)
1357 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1358 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1359 {
1360 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1361 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1362 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1363 }
1364 }
1365 else if (pPteDst->n.u1Present)
1366 {
1367 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1368 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1369 }
1370#endif /* PGMPOOL_WITH_USER_TRACKING */
1371
1372 /*
1373 * Update statistics and commit the entry.
1374 */
1375 if (!PteSrc.n.u1Global)
1376 pShwPage->fSeenNonGlobal = true;
1377 *pPteDst = PteDst;
1378 }
1379 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1380 /** @todo count these. */
1381 }
1382 else
1383 {
1384 /*
1385 * Page not-present.
1386 */
1387 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1388#ifdef PGMPOOL_WITH_USER_TRACKING
1389 /* Keep user track up to date. */
1390 if (pPteDst->n.u1Present)
1391 {
1392 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1393 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1394 }
1395#endif /* PGMPOOL_WITH_USER_TRACKING */
1396 pPteDst->u = 0;
1397 /** @todo count these. */
1398 }
1399}
1400
1401
1402/**
1403 * Syncs a guest OS page.
1404 *
1405 * There are no conflicts at this point, neither is there any need for
1406 * page table allocations.
1407 *
1408 * @returns VBox status code.
1409 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1410 * @param pVM VM handle.
1411 * @param PdeSrc Page directory entry of the guest.
1412 * @param GCPtrPage Guest context page address.
1413 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1414 * @param uErr Fault error (X86_TRAP_PF_*).
1415 */
1416PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr)
1417{
1418 LogFlow(("SyncPage: GCPtrPage=%VGv cPages=%d uErr=%#x\n", GCPtrPage, cPages, uErr));
1419
1420#if PGM_GST_TYPE == PGM_TYPE_32BIT \
1421 || PGM_GST_TYPE == PGM_TYPE_PAE
1422
1423# if PGM_WITH_NX(PGM_GST_TYPE)
1424 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1425# endif
1426
1427 /*
1428 * Assert preconditions.
1429 */
1430 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1431 Assert(PdeSrc.n.u1Present);
1432 Assert(cPages);
1433
1434 /*
1435 * Get the shadow PDE, find the shadow page table in the pool.
1436 */
1437 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1438# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1439 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1440# else /* PAE */
1441 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1442# endif
1443 Assert(PdeDst.n.u1Present);
1444 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1445
1446 /*
1447 * Check that the page is present and that the shadow PDE isn't out of sync.
1448 */
1449 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1450 RTGCPHYS GCPhys;
1451 if (!fBigPage)
1452 {
1453 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1454# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1455 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1456 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1457# endif
1458 }
1459 else
1460 {
1461 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
1462# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1463 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1464 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1465# endif
1466 }
1467 if ( pShwPage->GCPhys == GCPhys
1468 && PdeSrc.n.u1Present
1469 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1470 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1471# if PGM_WITH_NX(PGM_GST_TYPE)
1472 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1473# endif
1474 )
1475 {
1476 /*
1477 * Check that the PDE is marked accessed already.
1478 * Since we set the accessed bit *before* getting here on a #PF, this
1479 * check is only meant for dealing with non-#PF'ing paths.
1480 */
1481 if (PdeSrc.n.u1Accessed)
1482 {
1483 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1484 if (!fBigPage)
1485 {
1486 /*
1487 * 4KB Page - Map the guest page table.
1488 */
1489 PGSTPT pPTSrc;
1490 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1491 if (VBOX_SUCCESS(rc))
1492 {
1493# ifdef PGM_SYNC_N_PAGES
1494 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1495 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1496 {
1497 /*
1498 * This code path is currently only taken when the caller is PGMTrap0eHandler
1499 * for non-present pages!
1500 *
1501 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1502 * deal with locality.
1503 */
1504 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1505# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1506 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1507 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1508# else
1509 const unsigned offPTSrc = 0;
1510# endif
1511 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1512 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1513 iPTDst = 0;
1514 else
1515 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1516 for (; iPTDst < iPTDstEnd; iPTDst++)
1517 {
1518 if (!pPTDst->a[iPTDst].n.u1Present)
1519 {
1520 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1521 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1522 NOREF(GCPtrCurPage);
1523#ifndef IN_RING0
1524 /*
1525 * Assuming kernel code will be marked as supervisor - and not as user level
1526 * and executed using a conforming code selector - And marked as readonly.
1527 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1528 */
1529 PPGMPAGE pPage;
1530 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1531 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1532 || !CSAMDoesPageNeedScanning(pVM, (RTGCPTR)GCPtrCurPage)
1533 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1534 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1535 )
1536#endif /* else: CSAM not active */
1537 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1538 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1539 GCPtrCurPage, PteSrc.n.u1Present,
1540 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1541 PteSrc.n.u1User & PdeSrc.n.u1User,
1542 (uint64_t)PteSrc.u,
1543 (uint64_t)pPTDst->a[iPTDst].u,
1544 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1545 }
1546 }
1547 }
1548 else
1549# endif /* PGM_SYNC_N_PAGES */
1550 {
1551 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1552 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1553 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1554 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1555 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1556 GCPtrPage, PteSrc.n.u1Present,
1557 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1558 PteSrc.n.u1User & PdeSrc.n.u1User,
1559 (uint64_t)PteSrc.u,
1560 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1561 }
1562 }
1563 else /* MMIO or invalid page: emulated in #PF handler. */
1564 {
1565 LogFlow(("PGM_GCPHYS_2_PTR %VGp failed with %Vrc\n", GCPhys, rc));
1566 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1567 }
1568 }
1569 else
1570 {
1571 /*
1572 * 4/2MB page - lazy syncing shadow 4K pages.
1573 * (There are many causes of getting here, it's no longer only CSAM.)
1574 */
1575 /* Calculate the GC physical address of this 4KB shadow page. */
1576 RTGCPHYS GCPhys = (PdeSrc.u & GST_PDE_BIG_PG_MASK) | ((RTGCUINTPTR)GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1577 /* Find ram range. */
1578 PPGMPAGE pPage;
1579 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1580 if (VBOX_SUCCESS(rc))
1581 {
1582 /*
1583 * Make shadow PTE entry.
1584 */
1585 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo PAGE FLAGS */
1586 SHWPTE PteDst;
1587 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1588 | (HCPhys & X86_PTE_PAE_PG_MASK);
1589 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1590 {
1591 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1592 PteDst.n.u1Write = 0;
1593 else
1594 PteDst.u = 0;
1595 }
1596 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1597# ifdef PGMPOOL_WITH_USER_TRACKING
1598 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1599 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1600# endif
1601 pPTDst->a[iPTDst] = PteDst;
1602
1603
1604 /*
1605 * If the page is not flagged as dirty and is writable, then make it read-only
1606 * at PD level, so we can set the dirty bit when the page is modified.
1607 *
1608 * ASSUMES that page access handlers are implemented on page table entry level.
1609 * Thus we will first catch the dirty access and set PDE.D and restart. If
1610 * there is an access handler, we'll trap again and let it work on the problem.
1611 */
1612 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1613 * As for invlpg, it simply frees the whole shadow PT.
1614 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1615 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1616 {
1617 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
1618 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1619 PdeDst.n.u1Write = 0;
1620 }
1621 else
1622 {
1623 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1624 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1625 }
1626# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1627 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst] = PdeDst;
1628# else /* PAE */
1629 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst] = PdeDst;
1630# endif
1631 Log2(("SyncPage: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%VGp%s\n",
1632 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1633 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1634 }
1635 else
1636 LogFlow(("PGM_GCPHYS_2_PTR %VGp (big) failed with %Vrc\n", GCPhys, rc));
1637 }
1638 return VINF_SUCCESS;
1639 }
1640 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDNAs));
1641 }
1642 else
1643 {
1644 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDOutOfSync));
1645 Log2(("SyncPage: Out-Of-Sync PDE at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1646 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1647 }
1648
1649 /*
1650 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1651 * Yea, I'm lazy.
1652 */
1653 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1654# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1655 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst].u = 0;
1656# else /* PAE */
1657 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst].u = 0;
1658# endif
1659 PGM_INVL_GUEST_TLBS();
1660 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1661
1662#elif PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT
1663
1664# ifdef PGM_SYNC_N_PAGES
1665 /*
1666 * Get the shadow PDE, find the shadow page table in the pool.
1667 */
1668 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1669# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1670 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1671# else /* PAE */
1672 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1673# endif
1674 Assert(PdeDst.n.u1Present);
1675 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1676 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1677
1678# if PGM_SHW_TYPE == PGM_TYPE_PAE
1679 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1680 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1681# else
1682 const unsigned offPTSrc = 0;
1683# endif
1684
1685 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1686 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1687 {
1688 /*
1689 * This code path is currently only taken when the caller is PGMTrap0eHandler
1690 * for non-present pages!
1691 *
1692 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1693 * deal with locality.
1694 */
1695 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1696 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1697 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1698 iPTDst = 0;
1699 else
1700 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1701 for (; iPTDst < iPTDstEnd; iPTDst++)
1702 {
1703 if (!pPTDst->a[iPTDst].n.u1Present)
1704 {
1705 GSTPTE PteSrc;
1706
1707 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1708
1709 /* Fake the page table entry */
1710 PteSrc.u = GCPtrCurPage;
1711 PteSrc.n.u1Present = 1;
1712 PteSrc.n.u1Dirty = 1;
1713 PteSrc.n.u1Accessed = 1;
1714 PteSrc.n.u1Write = 1;
1715 PteSrc.n.u1User = 1;
1716
1717 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1718
1719 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1720 GCPtrCurPage, PteSrc.n.u1Present,
1721 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1722 PteSrc.n.u1User & PdeSrc.n.u1User,
1723 (uint64_t)PteSrc.u,
1724 (uint64_t)pPTDst->a[iPTDst].u,
1725 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1726 }
1727 }
1728 }
1729 else
1730# endif /* PGM_SYNC_N_PAGES */
1731 {
1732 GSTPTE PteSrc;
1733 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1734 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1735
1736 /* Fake the page table entry */
1737 PteSrc.u = GCPtrCurPage;
1738 PteSrc.n.u1Present = 1;
1739 PteSrc.n.u1Dirty = 1;
1740 PteSrc.n.u1Accessed = 1;
1741 PteSrc.n.u1Write = 1;
1742 PteSrc.n.u1User = 1;
1743 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1744
1745 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1746 GCPtrPage, PteSrc.n.u1Present,
1747 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1748 PteSrc.n.u1User & PdeSrc.n.u1User,
1749 (uint64_t)PteSrc.u,
1750 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1751 }
1752 return VINF_SUCCESS;
1753
1754#else /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1755 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
1756 return VERR_INTERNAL_ERROR;
1757#endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1758}
1759
1760
1761
1762#if PGM_WITH_PAGING(PGM_GST_TYPE)
1763
1764/**
1765 * Investigate page fault and handle write protection page faults caused by
1766 * dirty bit tracking.
1767 *
1768 * @returns VBox status code.
1769 * @param pVM VM handle.
1770 * @param uErr Page fault error code.
1771 * @param pPdeDst Shadow page directory entry.
1772 * @param pPdeSrc Guest page directory entry.
1773 * @param GCPtrPage Guest context page address.
1774 */
1775PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCUINTPTR GCPtrPage)
1776{
1777 bool fWriteProtect = !!(CPUMGetGuestCR0(pVM) & X86_CR0_WP);
1778 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
1779 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
1780 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1781# if PGM_WITH_NX(PGM_GST_TYPE)
1782 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1783# endif
1784 unsigned uPageFaultLevel;
1785 int rc;
1786
1787 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
1788 LogFlow(("CheckPageFault: GCPtrPage=%VGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
1789
1790# if PGM_GST_TYPE == PGM_TYPE_PAE \
1791 || PGM_GST_TYPE == PGM_TYPE_AMD64
1792
1793# if PGM_GST_TYPE == PGM_TYPE_AMD64
1794 PX86PML4E pPml4eSrc;
1795 PX86PDPE pPdpeSrc;
1796
1797 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc);
1798 Assert(pPml4eSrc);
1799
1800 /*
1801 * Real page fault? (PML4E level)
1802 */
1803 if ( (uErr & X86_TRAP_PF_RSVD)
1804 || !pPml4eSrc->n.u1Present
1805 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
1806 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
1807 || (fUserLevelFault && !pPml4eSrc->n.u1User)
1808 )
1809 {
1810 uPageFaultLevel = 0;
1811 goto UpperLevelPageFault;
1812 }
1813 Assert(pPdpeSrc);
1814
1815# else /* PAE */
1816 PX86PDPE pPdpeSrc = &pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[(GCPtrPage >> GST_PDPT_SHIFT) & GST_PDPT_MASK];
1817# endif
1818
1819 /*
1820 * Real page fault? (PDPE level)
1821 */
1822 if ( (uErr & X86_TRAP_PF_RSVD)
1823 || !pPdpeSrc->n.u1Present
1824# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
1825 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
1826 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
1827 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
1828# endif
1829 )
1830 {
1831 uPageFaultLevel = 1;
1832 goto UpperLevelPageFault;
1833 }
1834# endif
1835
1836 /*
1837 * Real page fault? (PDE level)
1838 */
1839 if ( (uErr & X86_TRAP_PF_RSVD)
1840 || !pPdeSrc->n.u1Present
1841# if PGM_WITH_NX(PGM_GST_TYPE)
1842 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
1843# endif
1844 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
1845 || (fUserLevelFault && !pPdeSrc->n.u1User) )
1846 {
1847 uPageFaultLevel = 2;
1848 goto UpperLevelPageFault;
1849 }
1850
1851 /*
1852 * First check the easy case where the page directory has been marked read-only to track
1853 * the dirty bit of an emulated BIG page
1854 */
1855 if (pPdeSrc->b.u1Size && fBigPagesSupported)
1856 {
1857 /* Mark guest page directory as accessed */
1858# if PGM_GST_TYPE == PGM_TYPE_AMD64
1859 pPml4eSrc->n.u1Accessed = 1;
1860 pPdpeSrc->lm.u1Accessed = 1;
1861# endif
1862 pPdeSrc->b.u1Accessed = 1;
1863
1864 /*
1865 * Only write protection page faults are relevant here.
1866 */
1867 if (fWriteFault)
1868 {
1869 /* Mark guest page directory as dirty (BIG page only). */
1870 pPdeSrc->b.u1Dirty = 1;
1871
1872 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
1873 {
1874 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
1875
1876 Assert(pPdeSrc->b.u1Write);
1877
1878 pPdeDst->n.u1Write = 1;
1879 pPdeDst->n.u1Accessed = 1;
1880 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1881 PGM_INVL_BIG_PG(GCPtrPage);
1882 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1883 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
1884 }
1885 }
1886 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1887 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
1888 }
1889 /* else: 4KB page table */
1890
1891 /*
1892 * Map the guest page table.
1893 */
1894 PGSTPT pPTSrc;
1895 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
1896 if (VBOX_SUCCESS(rc))
1897 {
1898 /*
1899 * Real page fault?
1900 */
1901 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
1902 const GSTPTE PteSrc = *pPteSrc;
1903 if ( !PteSrc.n.u1Present
1904# if PGM_WITH_NX(PGM_GST_TYPE)
1905 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
1906# endif
1907 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
1908 || (fUserLevelFault && !PteSrc.n.u1User)
1909 )
1910 {
1911# ifdef IN_GC
1912 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
1913# endif
1914 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1915 LogFlow(("CheckPageFault: real page fault at %VGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
1916
1917 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
1918 * See the 2nd case above as well.
1919 */
1920 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
1921 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
1922
1923 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1924 return VINF_EM_RAW_GUEST_TRAP;
1925 }
1926 LogFlow(("CheckPageFault: page fault at %VGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
1927
1928 /*
1929 * Set the accessed bits in the page directory and the page table.
1930 */
1931# if PGM_GST_TYPE == PGM_TYPE_AMD64
1932 pPml4eSrc->n.u1Accessed = 1;
1933 pPdpeSrc->lm.u1Accessed = 1;
1934# endif
1935 pPdeSrc->n.u1Accessed = 1;
1936 pPteSrc->n.u1Accessed = 1;
1937
1938 /*
1939 * Only write protection page faults are relevant here.
1940 */
1941 if (fWriteFault)
1942 {
1943 /* Write access, so mark guest entry as dirty. */
1944# if defined(IN_GC) && defined(VBOX_WITH_STATISTICS)
1945 if (!pPteSrc->n.u1Dirty)
1946 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtiedPage);
1947 else
1948 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageAlreadyDirty);
1949# endif
1950
1951 pPteSrc->n.u1Dirty = 1;
1952
1953 if (pPdeDst->n.u1Present)
1954 {
1955 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
1956 * Our individual shadow handlers will provide more information and force a fatal exit.
1957 */
1958 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
1959 {
1960 LogRel(("CheckPageFault: write to hypervisor region %VGv\n", GCPtrPage));
1961 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1962 return VINF_SUCCESS;
1963 }
1964
1965 /*
1966 * Map shadow page table.
1967 */
1968 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
1969 if (pShwPage)
1970 {
1971 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1972 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
1973 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
1974 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
1975 {
1976 LogFlow(("DIRTY page trap addr=%VGv\n", GCPtrPage));
1977# ifdef VBOX_STRICT
1978 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
1979 if (pPage)
1980 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
1981 ("Unexpected dirty bit tracking on monitored page %VGv (phys %VGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
1982# endif
1983 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
1984
1985 Assert(pPteSrc->n.u1Write);
1986
1987 pPteDst->n.u1Write = 1;
1988 pPteDst->n.u1Dirty = 1;
1989 pPteDst->n.u1Accessed = 1;
1990 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
1991 PGM_INVL_PG(GCPtrPage);
1992
1993 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1994 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
1995 }
1996 }
1997 else
1998 AssertMsgFailed(("pgmPoolGetPageByHCPhys %VGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
1999 }
2000 }
2001/** @todo Optimize accessed bit emulation? */
2002# ifdef VBOX_STRICT
2003 /*
2004 * Sanity check.
2005 */
2006 else if ( !pPteSrc->n.u1Dirty
2007 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2008 && pPdeDst->n.u1Present)
2009 {
2010 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2011 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2012 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2013 if ( pPteDst->n.u1Present
2014 && pPteDst->n.u1Write)
2015 LogFlow(("Writable present page %VGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2016 }
2017# endif /* VBOX_STRICT */
2018 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2019 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2020 }
2021 AssertRC(rc);
2022 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2023 return rc;
2024
2025
2026UpperLevelPageFault:
2027 /* Pagefault detected while checking the PML4E, PDPE or PDE.
2028 * Single exit handler to get rid of duplicate code paths.
2029 */
2030# ifdef IN_GC
2031 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
2032# endif
2033 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
2034 LogFlow(("CheckPageFault: real page fault at %VGv (%d)\n", GCPtrPage, uPageFaultLevel));
2035
2036 if (
2037# if PGM_GST_TYPE == PGM_TYPE_AMD64
2038 pPml4eSrc->n.u1Present &&
2039# endif
2040# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2041 pPdpeSrc->n.u1Present &&
2042# endif
2043 pPdeSrc->n.u1Present)
2044 {
2045 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2046 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2047 {
2048 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2049 }
2050 else
2051 {
2052 /*
2053 * Map the guest page table.
2054 */
2055 PGSTPT pPTSrc;
2056 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2057 if (VBOX_SUCCESS(rc))
2058 {
2059 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2060 const GSTPTE PteSrc = *pPteSrc;
2061 if (pPteSrc->n.u1Present)
2062 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2063 }
2064 AssertRC(rc);
2065 }
2066 }
2067 return VINF_EM_RAW_GUEST_TRAP;
2068}
2069
2070#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
2071
2072
2073/**
2074 * Sync a shadow page table.
2075 *
2076 * The shadow page table is not present. This includes the case where
2077 * there is a conflict with a mapping.
2078 *
2079 * @returns VBox status code.
2080 * @param pVM VM handle.
2081 * @param iPD Page directory index.
2082 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2083 * Assume this is a temporary mapping.
2084 * @param GCPtrPage GC Pointer of the page that caused the fault
2085 */
2086PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PGSTPD pPDSrc, RTGCUINTPTR GCPtrPage)
2087{
2088 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2089 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPtPD[iPDSrc]);
2090 LogFlow(("SyncPT: GCPtrPage=%VGv\n", GCPtrPage));
2091
2092#if PGM_GST_TYPE == PGM_TYPE_32BIT \
2093 || PGM_GST_TYPE == PGM_TYPE_PAE
2094
2095 /*
2096 * Validate input a little bit.
2097 */
2098 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%VGv\n", iPDSrc, GCPtrPage));
2099# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2100 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
2101# else
2102 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2103# endif
2104 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2105 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2106 SHWPDE PdeDst = *pPdeDst;
2107
2108# ifndef PGM_WITHOUT_MAPPINGS
2109 /*
2110 * Check for conflicts.
2111 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2112 * HC: Simply resolve the conflict.
2113 */
2114 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2115 {
2116 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2117# ifndef IN_RING3
2118 Log(("SyncPT: Conflict at %VGv\n", GCPtrPage));
2119 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2120 return VERR_ADDRESS_CONFLICT;
2121# else
2122 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2123 Assert(pMapping);
2124# if PGM_GST_TYPE == PGM_TYPE_32BIT
2125 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2126# elif PGM_GST_TYPE == PGM_TYPE_PAE
2127 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2128# endif
2129 if (VBOX_FAILURE(rc))
2130 {
2131 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2132 return rc;
2133 }
2134 PdeDst = *pPdeDst;
2135# endif
2136 }
2137# else /* PGM_WITHOUT_MAPPINGS */
2138 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2139# endif /* PGM_WITHOUT_MAPPINGS */
2140 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2141
2142 /*
2143 * Sync page directory entry.
2144 */
2145 int rc = VINF_SUCCESS;
2146 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2147 if (PdeSrc.n.u1Present)
2148 {
2149 /*
2150 * Allocate & map the page table.
2151 */
2152 PSHWPT pPTDst;
2153 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2154 PPGMPOOLPAGE pShwPage;
2155 RTGCPHYS GCPhys;
2156 if (fPageTable)
2157 {
2158 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2159# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2160 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2161 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2162# endif
2163 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2164 }
2165 else
2166 {
2167 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
2168# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2169 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2170 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2171# endif
2172 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2173 }
2174 if (rc == VINF_SUCCESS)
2175 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2176 else if (rc == VINF_PGM_CACHED_PAGE)
2177 {
2178 /*
2179 * The PT was cached, just hook it up.
2180 */
2181 if (fPageTable)
2182 PdeDst.u = pShwPage->Core.Key
2183 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2184 else
2185 {
2186 PdeDst.u = pShwPage->Core.Key
2187 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2188 /* (see explanation and assumptions further down.) */
2189 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2190 {
2191 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
2192 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2193 PdeDst.b.u1Write = 0;
2194 }
2195 }
2196 *pPdeDst = PdeDst;
2197 return VINF_SUCCESS;
2198 }
2199 else if (rc == VERR_PGM_POOL_FLUSHED)
2200 return VINF_PGM_SYNC_CR3;
2201 else
2202 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
2203 PdeDst.u &= X86_PDE_AVL_MASK;
2204 PdeDst.u |= pShwPage->Core.Key;
2205
2206 /*
2207 * Page directory has been accessed (this is a fault situation, remember).
2208 */
2209 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2210 if (fPageTable)
2211 {
2212 /*
2213 * Page table - 4KB.
2214 *
2215 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2216 */
2217 Log2(("SyncPT: 4K %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2218 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2219 PGSTPT pPTSrc;
2220 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2221 if (VBOX_SUCCESS(rc))
2222 {
2223 /*
2224 * Start by syncing the page directory entry so CSAM's TLB trick works.
2225 */
2226 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2227 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2228 *pPdeDst = PdeDst;
2229
2230 /*
2231 * Directory/page user or supervisor privilege: (same goes for read/write)
2232 *
2233 * Directory Page Combined
2234 * U/S U/S U/S
2235 * 0 0 0
2236 * 0 1 0
2237 * 1 0 0
2238 * 1 1 1
2239 *
2240 * Simple AND operation. Table listed for completeness.
2241 *
2242 */
2243 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4k));
2244# ifdef PGM_SYNC_N_PAGES
2245 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2246 unsigned iPTDst = iPTBase;
2247 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
2248 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2249 iPTDst = 0;
2250 else
2251 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2252# else /* !PGM_SYNC_N_PAGES */
2253 unsigned iPTDst = 0;
2254 const unsigned iPTDstEnd = ELEMENTS(pPTDst->a);
2255# endif /* !PGM_SYNC_N_PAGES */
2256# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2257 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2258 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2259# else
2260 const unsigned offPTSrc = 0;
2261# endif
2262 for (; iPTDst < iPTDstEnd; iPTDst++)
2263 {
2264 const unsigned iPTSrc = iPTDst + offPTSrc;
2265 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2266
2267 if (PteSrc.n.u1Present) /* we've already cleared it above */
2268 {
2269# ifndef IN_RING0
2270 /*
2271 * Assuming kernel code will be marked as supervisor - and not as user level
2272 * and executed using a conforming code selector - And marked as readonly.
2273 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2274 */
2275 PPGMPAGE pPage;
2276 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2277 || !CSAMDoesPageNeedScanning(pVM, (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2278 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2279 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2280 )
2281# endif
2282 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2283 Log2(("SyncPT: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%VGp\n",
2284 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2285 PteSrc.n.u1Present,
2286 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2287 PteSrc.n.u1User & PdeSrc.n.u1User,
2288 (uint64_t)PteSrc.u,
2289 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2290 (PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)));
2291 }
2292 } /* for PTEs */
2293 }
2294 }
2295 else
2296 {
2297 /*
2298 * Big page - 2/4MB.
2299 *
2300 * We'll walk the ram range list in parallel and optimize lookups.
2301 * We will only sync on shadow page table at a time.
2302 */
2303 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4M));
2304
2305 /**
2306 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2307 */
2308
2309 /*
2310 * Start by syncing the page directory entry.
2311 */
2312 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2313 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2314
2315 /*
2316 * If the page is not flagged as dirty and is writable, then make it read-only
2317 * at PD level, so we can set the dirty bit when the page is modified.
2318 *
2319 * ASSUMES that page access handlers are implemented on page table entry level.
2320 * Thus we will first catch the dirty access and set PDE.D and restart. If
2321 * there is an access handler, we'll trap again and let it work on the problem.
2322 */
2323 /** @todo move the above stuff to a section in the PGM documentation. */
2324 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2325 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2326 {
2327 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
2328 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2329 PdeDst.b.u1Write = 0;
2330 }
2331 *pPdeDst = PdeDst;
2332
2333 /*
2334 * Fill the shadow page table.
2335 */
2336 /* Get address and flags from the source PDE. */
2337 SHWPTE PteDstBase;
2338 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2339
2340 /* Loop thru the entries in the shadow PT. */
2341 const RTGCUINTPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2342 Log2(("SyncPT: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%VGv GCPhys=%VGp %s\n",
2343 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2344 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2345 PPGMRAMRANGE pRam = CTXALLSUFF(pVM->pgm.s.pRamRanges);
2346 unsigned iPTDst = 0;
2347 while (iPTDst < ELEMENTS(pPTDst->a))
2348 {
2349 /* Advance ram range list. */
2350 while (pRam && GCPhys > pRam->GCPhysLast)
2351 pRam = CTXALLSUFF(pRam->pNext);
2352 if (pRam && GCPhys >= pRam->GCPhys)
2353 {
2354 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2355 do
2356 {
2357 /* Make shadow PTE. */
2358 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2359 SHWPTE PteDst;
2360
2361 /* Make sure the RAM has already been allocated. */
2362 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC) /** @todo PAGE FLAGS */
2363 {
2364 if (RT_UNLIKELY(!PGM_PAGE_GET_HCPHYS(pPage)))
2365 {
2366# ifdef IN_RING3
2367 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2368# else
2369 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2370# endif
2371 if (rc != VINF_SUCCESS)
2372 return rc;
2373 }
2374 }
2375
2376 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2377 {
2378 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2379 {
2380 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2381 PteDst.n.u1Write = 0;
2382 }
2383 else
2384 PteDst.u = 0;
2385 }
2386# ifndef IN_RING0
2387 /*
2388 * Assuming kernel code will be marked as supervisor and not as user level and executed
2389 * using a conforming code selector. Don't check for readonly, as that implies the whole
2390 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2391 */
2392 else if ( !PdeSrc.n.u1User
2393 && CSAMDoesPageNeedScanning(pVM, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2394 PteDst.u = 0;
2395# endif
2396 else
2397 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2398# ifdef PGMPOOL_WITH_USER_TRACKING
2399 if (PteDst.n.u1Present)
2400 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, pPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst); /** @todo PAGE FLAGS */
2401# endif
2402 /* commit it */
2403 pPTDst->a[iPTDst] = PteDst;
2404 Log4(("SyncPT: BIG %VGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2405 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2406 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2407
2408 /* advance */
2409 GCPhys += PAGE_SIZE;
2410 iHCPage++;
2411 iPTDst++;
2412 } while ( iPTDst < ELEMENTS(pPTDst->a)
2413 && GCPhys <= pRam->GCPhysLast);
2414 }
2415 else if (pRam)
2416 {
2417 Log(("Invalid pages at %VGp\n", GCPhys));
2418 do
2419 {
2420 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2421 GCPhys += PAGE_SIZE;
2422 iPTDst++;
2423 } while ( iPTDst < ELEMENTS(pPTDst->a)
2424 && GCPhys < pRam->GCPhys);
2425 }
2426 else
2427 {
2428 Log(("Invalid pages at %VGp (2)\n", GCPhys));
2429 for ( ; iPTDst < ELEMENTS(pPTDst->a); iPTDst++)
2430 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2431 }
2432 } /* while more PTEs */
2433 } /* 4KB / 4MB */
2434 }
2435 else
2436 AssertRelease(!PdeDst.n.u1Present);
2437
2438 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2439# ifdef IN_GC
2440 if (VBOX_FAILURE(rc))
2441 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPTFailed));
2442# endif
2443 return rc;
2444
2445#elif PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT
2446
2447 int rc = VINF_SUCCESS;
2448
2449 /*
2450 * Validate input a little bit.
2451 */
2452# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2453 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
2454# else
2455 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2456# endif
2457 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2458 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2459 SHWPDE PdeDst = *pPdeDst;
2460
2461 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2462 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2463
2464 GSTPDE PdeSrc;
2465 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2466 PdeSrc.n.u1Present = 1;
2467 PdeSrc.n.u1Write = 1;
2468 PdeSrc.n.u1Accessed = 1;
2469 PdeSrc.n.u1User = 1;
2470
2471 /*
2472 * Allocate & map the page table.
2473 */
2474 PSHWPT pPTDst;
2475 PPGMPOOLPAGE pShwPage;
2476 RTGCPHYS GCPhys;
2477
2478 /* Virtual address = physical address */
2479 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK_32;
2480 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2481
2482 if ( rc == VINF_SUCCESS
2483 || rc == VINF_PGM_CACHED_PAGE)
2484 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2485 else
2486 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
2487
2488 PdeDst.u &= X86_PDE_AVL_MASK;
2489 PdeDst.u |= pShwPage->Core.Key;
2490 PdeDst.n.u1Present = 1;
2491 *pPdeDst = PdeDst;
2492
2493 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2494 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2495 return rc;
2496
2497#else /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
2498 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2499 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2500 return VERR_INTERNAL_ERROR;
2501#endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
2502}
2503
2504
2505
2506/**
2507 * Prefetch a page/set of pages.
2508 *
2509 * Typically used to sync commonly used pages before entering raw mode
2510 * after a CR3 reload.
2511 *
2512 * @returns VBox status code.
2513 * @param pVM VM handle.
2514 * @param GCPtrPage Page to invalidate.
2515 */
2516PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage)
2517{
2518#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE) && PGM_SHW_TYPE != PGM_TYPE_AMD64
2519 /*
2520 * Check that all Guest levels thru the PDE are present, getting the
2521 * PD and PDE in the processes.
2522 */
2523 int rc = VINF_SUCCESS;
2524# if PGM_WITH_PAGING(PGM_GST_TYPE)
2525# if PGM_GST_TYPE == PGM_TYPE_32BIT
2526 const unsigned iPDSrc = (RTGCUINTPTR)GCPtrPage >> GST_PD_SHIFT;
2527 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2528# else /* PAE */
2529 unsigned iPDSrc;
2530 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
2531 if (!pPDSrc)
2532 return VINF_SUCCESS; /* not present */
2533# endif
2534 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2535# else
2536 PGSTPD pPDSrc = NULL;
2537 const unsigned iPDSrc = 0;
2538 GSTPDE PdeSrc;
2539
2540 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2541 PdeSrc.n.u1Present = 1;
2542 PdeSrc.n.u1Write = 1;
2543 PdeSrc.n.u1Accessed = 1;
2544 PdeSrc.n.u1User = 1;
2545# endif
2546
2547 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
2548 {
2549# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2550 const X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> SHW_PD_SHIFT];
2551# else
2552 const X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> SHW_PD_SHIFT];
2553# endif
2554 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
2555 {
2556 if (!PdeDst.n.u1Present)
2557 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
2558 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2559 else
2560 {
2561 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
2562 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
2563 * makes no sense to prefetch more than one page.
2564 */
2565 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2566 if (VBOX_SUCCESS(rc))
2567 rc = VINF_SUCCESS;
2568 }
2569 }
2570 }
2571 return rc;
2572
2573#else /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
2574
2575 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
2576 return VERR_INTERNAL_ERROR;
2577#endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
2578}
2579
2580
2581
2582
2583/**
2584 * Syncs a page during a PGMVerifyAccess() call.
2585 *
2586 * @returns VBox status code (informational included).
2587 * @param GCPtrPage The address of the page to sync.
2588 * @param fPage The effective guest page flags.
2589 * @param uErr The trap error code.
2590 */
2591PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR GCPtrPage, unsigned fPage, unsigned uErr)
2592{
2593 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%VGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
2594
2595#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE) && PGM_SHW_TYPE != PGM_TYPE_AMD64
2596
2597# ifndef IN_RING0
2598 if (!(fPage & X86_PTE_US))
2599 {
2600 /*
2601 * Mark this page as safe.
2602 */
2603 /** @todo not correct for pages that contain both code and data!! */
2604 Log(("CSAMMarkPage %VGv; scanned=%d\n", GCPtrPage, true));
2605 CSAMMarkPage(pVM, (RTGCPTR)GCPtrPage, true);
2606 }
2607# endif
2608 /*
2609 * Get guest PD and index.
2610 */
2611
2612# if PGM_WITH_PAGING(PGM_GST_TYPE)
2613# if PGM_GST_TYPE == PGM_TYPE_32BIT
2614 const unsigned iPDSrc = (RTGCUINTPTR)GCPtrPage >> GST_PD_SHIFT;
2615 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2616# else /* PAE */
2617 unsigned iPDSrc;
2618 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
2619
2620 if (pPDSrc)
2621 {
2622 Log(("PGMVerifyAccess: access violation for %VGv due to non-present PDPTR\n", GCPtrPage));
2623 return VINF_EM_RAW_GUEST_TRAP;
2624 }
2625# endif
2626# else
2627 PGSTPD pPDSrc = NULL;
2628 const unsigned iPDSrc = 0;
2629# endif
2630 int rc = VINF_SUCCESS;
2631
2632 /*
2633 * First check if the shadow pd is present.
2634 */
2635# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2636 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> SHW_PD_SHIFT];
2637# else
2638 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> SHW_PD_SHIFT];
2639# endif
2640 if (!pPdeDst->n.u1Present)
2641 {
2642 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2643 AssertRC(rc);
2644 if (rc != VINF_SUCCESS)
2645 return rc;
2646 }
2647
2648# if PGM_WITH_PAGING(PGM_GST_TYPE)
2649 /* Check for dirty bit fault */
2650 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
2651 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
2652 Log(("PGMVerifyAccess: success (dirty)\n"));
2653 else
2654 {
2655 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2656#else
2657 {
2658 GSTPDE PdeSrc;
2659 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2660 PdeSrc.n.u1Present = 1;
2661 PdeSrc.n.u1Write = 1;
2662 PdeSrc.n.u1Accessed = 1;
2663 PdeSrc.n.u1User = 1;
2664
2665#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
2666 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
2667 if (uErr & X86_TRAP_PF_US)
2668 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
2669 else /* supervisor */
2670 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
2671
2672 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2673 if (VBOX_SUCCESS(rc))
2674 {
2675 /* Page was successfully synced */
2676 Log2(("PGMVerifyAccess: success (sync)\n"));
2677 rc = VINF_SUCCESS;
2678 }
2679 else
2680 {
2681 Log(("PGMVerifyAccess: access violation for %VGv rc=%d\n", GCPtrPage, rc));
2682 return VINF_EM_RAW_GUEST_TRAP;
2683 }
2684 }
2685 return rc;
2686
2687#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2688
2689 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2690 return VERR_INTERNAL_ERROR;
2691#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2692}
2693
2694
2695#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE
2696# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE
2697/**
2698 * Figures out which kind of shadow page this guest PDE warrants.
2699 *
2700 * @returns Shadow page kind.
2701 * @param pPdeSrc The guest PDE in question.
2702 * @param cr4 The current guest cr4 value.
2703 */
2704DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const GSTPDE *pPdeSrc, uint32_t cr4)
2705{
2706 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
2707 return BTH_PGMPOOLKIND_PT_FOR_PT;
2708 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
2709 //{
2710 // case 0:
2711 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
2712 // case X86_PDE4M_RW:
2713 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
2714 // case X86_PDE4M_US:
2715 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
2716 // case X86_PDE4M_RW | X86_PDE4M_US:
2717 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
2718# if 0
2719 // case X86_PDE4M_PAE_NX:
2720 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
2721 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
2722 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
2723 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
2724 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
2725 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
2726 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
2727# endif
2728 return BTH_PGMPOOLKIND_PT_FOR_BIG;
2729 //}
2730}
2731# endif
2732#endif
2733
2734#undef MY_STAM_COUNTER_INC
2735#define MY_STAM_COUNTER_INC(a) do { } while (0)
2736
2737
2738/**
2739 * Syncs the paging hierarchy starting at CR3.
2740 *
2741 * @returns VBox status code, no specials.
2742 * @param pVM The virtual machine.
2743 * @param cr0 Guest context CR0 register
2744 * @param cr3 Guest context CR3 register
2745 * @param cr4 Guest context CR4 register
2746 * @param fGlobal Including global page directories or not
2747 */
2748PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
2749{
2750 if (VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
2751 fGlobal = true; /* Change this CR3 reload to be a global one. */
2752
2753 /*
2754 * Update page access handlers.
2755 * The virtual are always flushed, while the physical are only on demand.
2756 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
2757 * have to look into that later because it will have a bad influence on the performance.
2758 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
2759 * bird: Yes, but that won't work for aliases.
2760 */
2761 /** @todo this MUST go away. See #1557. */
2762 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
2763 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
2764 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
2765
2766#ifdef PGMPOOL_WITH_MONITORING
2767 /*
2768 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2769 * Occationally we will have to clear all the shadow page tables because we wanted
2770 * to monitor a page which was mapped by too many shadowed page tables. This operation
2771 * sometimes refered to as a 'lightweight flush'.
2772 */
2773 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2774 pgmPoolMonitorModifiedClearAll(pVM);
2775 else
2776 {
2777# ifdef IN_RING3
2778 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2779 pgmPoolClearAll(pVM);
2780# else
2781 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2782 return VINF_PGM_SYNC_CR3;
2783# endif
2784 }
2785#endif
2786
2787 Assert(fGlobal || (cr4 & X86_CR4_PGE));
2788 MY_STAM_COUNTER_INC(fGlobal ? &pVM->pgm.s.CTXMID(Stat,SyncCR3Global) : &pVM->pgm.s.CTXMID(Stat,SyncCR3NotGlobal));
2789
2790#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE
2791 /*
2792 * Get page directory addresses.
2793 */
2794# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2795 PX86PDE pPDEDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[0];
2796# else /* PGM_SHW_TYPE == PGM_TYPE_PAE */
2797# if PGM_GST_TYPE == PGM_TYPE_32BIT
2798 PX86PDEPAE pPDEDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[0];
2799# endif
2800# endif
2801
2802# if PGM_GST_TYPE == PGM_TYPE_32BIT
2803 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2804 Assert(pPDSrc);
2805# ifndef IN_GC
2806 Assert(MMPhysGCPhys2HCVirt(pVM, (RTGCPHYS)(cr3 & GST_CR3_PAGE_MASK), sizeof(*pPDSrc)) == pPDSrc);
2807# endif
2808# endif
2809
2810 /*
2811 * Iterate the page directory.
2812 */
2813 PPGMMAPPING pMapping;
2814 unsigned iPdNoMapping;
2815 const bool fRawR0Enabled = EMIsRawRing0Enabled(pVM);
2816 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2817
2818 /* Only check mappings if they are supposed to be put into the shadow page table. */
2819 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
2820 {
2821 pMapping = pVM->pgm.s.CTXALLSUFF(pMappings);
2822 iPdNoMapping = (pMapping) ? (pMapping->GCPtr >> GST_PD_SHIFT) : ~0U;
2823 }
2824 else
2825 {
2826 pMapping = 0;
2827 iPdNoMapping = ~0U;
2828 }
2829# if PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
2830 for (unsigned iPDPTE = 0; iPDPTE < GST_PDPE_ENTRIES; iPDPTE++)
2831 {
2832 unsigned iPDSrc;
2833# if PGM_SHW_TYPE == PGM_TYPE_PAE
2834 PX86PDPAE pPDPAE = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2835# else
2836 AssertFailed(); /* @todo */
2837 PX86PDPE pPDPAE = pVM->pgm.s.CTXMID(ap,PaePDs)[iPDPTE * X86_PG_AMD64_ENTRIES];
2838# endif
2839 PX86PDEPAE pPDEDst = &pPDPAE->a[iPDPTE * X86_PG_PAE_ENTRIES];
2840 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, iPDPTE << X86_PDPT_SHIFT, &iPDSrc);
2841
2842 if (pPDSrc == NULL)
2843 {
2844 /* PDPT not present */
2845 if (pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].n.u1Present)
2846 {
2847 for (unsigned iPD = 0; iPD < ELEMENTS(pPDSrc->a); iPD++)
2848 {
2849 if ( pPDEDst[iPD].n.u1Present
2850 && !(pPDEDst[iPD].u & PGM_PDFLAGS_MAPPING))
2851 {
2852 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst[iPD].u & SHW_PDE_PG_MASK), SHW_POOL_ROOT_IDX, iPDPTE * X86_PG_PAE_ENTRIES + iPD);
2853 pPDEDst[iPD].u = 0;
2854 }
2855 }
2856 }
2857 if (!(pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].u & PGM_PLXFLAGS_MAPPING))
2858 pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].n.u1Present = 0;
2859 continue;
2860 }
2861# else /* PGM_GST_TYPE != PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_AMD64 */
2862 {
2863# endif /* PGM_GST_TYPE != PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_AMD64 */
2864 for (unsigned iPD = 0; iPD < ELEMENTS(pPDSrc->a); iPD++)
2865 {
2866# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2867 Assert(&pVM->pgm.s.CTXMID(p,32BitPD)->a[iPD] == pPDEDst);
2868# elif PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2869 AssertMsg(&pVM->pgm.s.CTXMID(ap,PaePDs)[iPD * 2 / 512]->a[iPD * 2 % 512] == pPDEDst, ("%p vs %p\n", &pVM->pgm.s.CTXMID(ap,PaePDs)[iPD * 2 / 512]->a[iPD * 2 % 512], pPDEDst));
2870# endif
2871 register GSTPDE PdeSrc = pPDSrc->a[iPD];
2872 if ( PdeSrc.n.u1Present
2873 && (PdeSrc.n.u1User || fRawR0Enabled))
2874 {
2875# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2876 || PGM_GST_TYPE == PGM_TYPE_PAE) \
2877 && !defined(PGM_WITHOUT_MAPPINGS)
2878
2879 /*
2880 * Check for conflicts with GC mappings.
2881 */
2882# if PGM_GST_TYPE == PGM_TYPE_PAE
2883 if (iPD + iPDPTE * X86_PG_PAE_ENTRIES == iPdNoMapping)
2884# else
2885 if (iPD == iPdNoMapping)
2886# endif
2887 {
2888 if (pVM->pgm.s.fMappingsFixed)
2889 {
2890 /* It's fixed, just skip the mapping. */
2891 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
2892 iPD += cPTs - 1;
2893 pPDEDst += cPTs + (PGM_GST_TYPE != PGM_SHW_TYPE) * cPTs; /* Only applies to the pae shadow and 32 bits guest case */
2894 pMapping = pMapping->CTXALLSUFF(pNext);
2895 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
2896 continue;
2897 }
2898# ifdef IN_RING3
2899# if PGM_GST_TYPE == PGM_TYPE_32BIT
2900 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
2901# elif PGM_GST_TYPE == PGM_TYPE_PAE
2902 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPDPTE << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
2903# endif
2904 if (VBOX_FAILURE(rc))
2905 return rc;
2906
2907 /*
2908 * Update iPdNoMapping and pMapping.
2909 */
2910 pMapping = pVM->pgm.s.pMappingsR3;
2911 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
2912 pMapping = pMapping->pNextR3;
2913 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
2914# else
2915 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
2916 return VINF_PGM_SYNC_CR3;
2917# endif
2918 }
2919# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
2920 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2921# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
2922 /*
2923 * Sync page directory entry.
2924 *
2925 * The current approach is to allocated the page table but to set
2926 * the entry to not-present and postpone the page table synching till
2927 * it's actually used.
2928 */
2929# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2930 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
2931# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
2932 const unsigned iPdShw = iPD + iPDPTE * X86_PG_PAE_ENTRIES; NOREF(iPdShw);
2933# else
2934 const unsigned iPdShw = iPD; NOREF(iPdShw);
2935# endif
2936 {
2937 SHWPDE PdeDst = *pPDEDst;
2938 if (PdeDst.n.u1Present)
2939 {
2940 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2941 RTGCPHYS GCPhys;
2942 if ( !PdeSrc.b.u1Size
2943 || !(cr4 & X86_CR4_PSE))
2944 {
2945 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2946# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2947 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2948 GCPhys |= i * (PAGE_SIZE / 2);
2949# endif
2950 }
2951 else
2952 {
2953 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
2954# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2955 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2956 GCPhys |= i * X86_PAGE_2M_SIZE;
2957# endif
2958 }
2959
2960 if ( pShwPage->GCPhys == GCPhys
2961 && pShwPage->enmKind == PGM_BTH_NAME(CalcPageKind)(&PdeSrc, cr4)
2962 && ( pShwPage->fCached
2963 || ( !fGlobal
2964 && ( false
2965# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
2966 || ( (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
2967 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE)) /* global 2/4MB page. */
2968 || ( !pShwPage->fSeenNonGlobal
2969 && (cr4 & X86_CR4_PGE))
2970# endif
2971 )
2972 )
2973 )
2974 && ( (PdeSrc.u & (X86_PDE_US | X86_PDE_RW)) == (PdeDst.u & (X86_PDE_US | X86_PDE_RW))
2975 || ( (cr4 & X86_CR4_PSE)
2976 && ((PdeSrc.u & (X86_PDE_US | X86_PDE4M_PS | X86_PDE4M_D)) | PGM_PDFLAGS_TRACK_DIRTY)
2977 == ((PdeDst.u & (X86_PDE_US | X86_PDE_RW | PGM_PDFLAGS_TRACK_DIRTY)) | X86_PDE4M_PS))
2978 )
2979 )
2980 {
2981# ifdef VBOX_WITH_STATISTICS
2982 if ( !fGlobal
2983 && (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
2984 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE))
2985 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPD));
2986 else if (!fGlobal && !pShwPage->fSeenNonGlobal && (cr4 & X86_CR4_PGE))
2987 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPT));
2988 else
2989 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstCacheHit));
2990# endif /* VBOX_WITH_STATISTICS */
2991 /** @todo a replacement strategy isn't really needed unless we're using a very small pool < 512 pages.
2992 * The whole ageing stuff should be put in yet another set of #ifdefs. For now, let's just skip it. */
2993 //# ifdef PGMPOOL_WITH_CACHE
2994 // pgmPoolCacheUsed(pPool, pShwPage);
2995 //# endif
2996 }
2997 else
2998 {
2999 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPdShw);
3000 pPDEDst->u = 0;
3001 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreed));
3002 }
3003 }
3004 else
3005 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstNotPresent));
3006 pPDEDst++;
3007 }
3008 }
3009# if PGM_GST_TYPE == PGM_TYPE_PAE
3010 else if (iPD + iPDPTE * X86_PG_PAE_ENTRIES != iPdNoMapping)
3011# else
3012 else if (iPD != iPdNoMapping)
3013# endif
3014 {
3015 /*
3016 * Check if there is any page directory to mark not present here.
3017 */
3018# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3019 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
3020# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3021 const unsigned iPdShw = iPD + iPDPTE * X86_PG_PAE_ENTRIES; NOREF(iPdShw);
3022# else
3023 const unsigned iPdShw = iPD; NOREF(iPdShw);
3024# endif
3025 {
3026 if (pPDEDst->n.u1Present)
3027 {
3028 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst->u & SHW_PDE_PG_MASK), SHW_POOL_ROOT_IDX, iPdShw);
3029 pPDEDst->u = 0;
3030 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreedSrcNP));
3031 }
3032 pPDEDst++;
3033 }
3034 }
3035 else
3036 {
3037# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3038 || PGM_GST_TYPE == PGM_TYPE_PAE) \
3039 && !defined(PGM_WITHOUT_MAPPINGS)
3040
3041 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
3042
3043 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3044 if (pVM->pgm.s.fMappingsFixed)
3045 {
3046 /* It's fixed, just skip the mapping. */
3047 pMapping = pMapping->CTXALLSUFF(pNext);
3048 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3049 }
3050 else
3051 {
3052 /*
3053 * Check for conflicts for subsequent pagetables
3054 * and advance to the next mapping.
3055 */
3056 iPdNoMapping = ~0U;
3057 unsigned iPT = cPTs;
3058 while (iPT-- > 1)
3059 {
3060 if ( pPDSrc->a[iPD + iPT].n.u1Present
3061 && (pPDSrc->a[iPD + iPT].n.u1User || fRawR0Enabled))
3062 {
3063# ifdef IN_RING3
3064# if PGM_GST_TYPE == PGM_TYPE_32BIT
3065 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
3066# elif PGM_GST_TYPE == PGM_TYPE_PAE
3067 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPDPTE << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
3068# endif
3069 if (VBOX_FAILURE(rc))
3070 return rc;
3071
3072 /*
3073 * Update iPdNoMapping and pMapping.
3074 */
3075 pMapping = pVM->pgm.s.CTXALLSUFF(pMappings);
3076 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
3077 pMapping = pMapping->CTXALLSUFF(pNext);
3078 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3079 break;
3080# else
3081 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3082 return VINF_PGM_SYNC_CR3;
3083# endif
3084 }
3085 }
3086 if (iPdNoMapping == ~0U && pMapping)
3087 {
3088 pMapping = pMapping->CTXALLSUFF(pNext);
3089 if (pMapping)
3090 iPdNoMapping = pMapping->GCPtr >> GST_PD_SHIFT;
3091 }
3092 }
3093
3094 /* advance. */
3095 iPD += cPTs - 1;
3096 pPDEDst += cPTs + (PGM_GST_TYPE != PGM_SHW_TYPE) * cPTs; /* Only applies to the pae shadow and 32 bits guest case */
3097# if PGM_GST_TYPE != PGM_SHW_TYPE
3098 AssertCompile(PGM_GST_TYPE == PGM_TYPE_32BIT && PGM_SHW_TYPE == PGM_TYPE_PAE);
3099# endif
3100# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3101 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3102# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3103 }
3104
3105 } /* for iPD */
3106 } /* for each PDPTE (PAE) */
3107
3108 return VINF_SUCCESS;
3109
3110#elif PGM_GST_TYPE == PGM_TYPE_AMD64
3111//# error not implemented
3112 return VERR_INTERNAL_ERROR;
3113#else /* guest real and protected mode */
3114 return VINF_SUCCESS;
3115#endif
3116}
3117
3118
3119
3120
3121#ifdef VBOX_STRICT
3122#ifdef IN_GC
3123# undef AssertMsgFailed
3124# define AssertMsgFailed Log
3125#endif
3126#ifdef IN_RING3
3127# include <VBox/dbgf.h>
3128
3129/**
3130 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3131 *
3132 * @returns VBox status code (VINF_SUCCESS).
3133 * @param pVM The VM handle.
3134 * @param cr3 The root of the hierarchy.
3135 * @param crr The cr4, only PAE and PSE is currently used.
3136 * @param fLongMode Set if long mode, false if not long mode.
3137 * @param cMaxDepth Number of levels to dump.
3138 * @param pHlp Pointer to the output functions.
3139 */
3140__BEGIN_DECLS
3141PGMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3142__END_DECLS
3143
3144#endif
3145
3146/**
3147 * Checks that the shadow page table is in sync with the guest one.
3148 *
3149 * @returns The number of errors.
3150 * @param pVM The virtual machine.
3151 * @param cr3 Guest context CR3 register
3152 * @param cr4 Guest context CR4 register
3153 * @param GCPtr Where to start. Defaults to 0.
3154 * @param cb How much to check. Defaults to everything.
3155 */
3156PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCUINTPTR GCPtr, RTGCUINTPTR cb)
3157{
3158 unsigned cErrors = 0;
3159
3160#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3161 || PGM_GST_TYPE == PGM_TYPE_PAE
3162
3163 PPGM pPGM = &pVM->pgm.s;
3164 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3165 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3166# ifndef IN_RING0
3167 RTHCPHYS HCPhys; /* general usage. */
3168# endif
3169 int rc;
3170
3171 /*
3172 * Check that the Guest CR3 and all its mappings are correct.
3173 */
3174 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3175 ("Invalid GCPhysCR3=%VGp cr3=%VGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3176 false);
3177# ifndef IN_RING0
3178# if PGM_GST_TYPE == PGM_TYPE_32BIT
3179 rc = PGMShwGetPage(pVM, pPGM->pGuestPDGC, NULL, &HCPhysShw);
3180# else
3181 rc = PGMShwGetPage(pVM, pPGM->pGstPaePDPTGC, NULL, &HCPhysShw);
3182# endif
3183 AssertRCReturn(rc, 1);
3184 HCPhys = NIL_RTHCPHYS;
3185 rc = pgmRamGCPhys2HCPhys(pPGM, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3186 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%VHp HCPhyswShw=%VHp (cr3)\n", HCPhys, HCPhysShw), false);
3187# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3188 RTGCPHYS GCPhys;
3189 rc = PGMR3DbgHCPtr2GCPhys(pVM, pPGM->pGuestPDHC, &GCPhys);
3190 AssertRCReturn(rc, 1);
3191 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%VGp cr3=%VGp\n", GCPhys, (RTGCPHYS)cr3), false);
3192# endif
3193#endif /* !IN_RING0 */
3194
3195# if PGM_GST_TYPE == PGM_TYPE_32BIT
3196 const GSTPD *pPDSrc = CTXSUFF(pPGM->pGuestPD);
3197# endif
3198
3199 /*
3200 * Get and check the Shadow CR3.
3201 */
3202# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3203 const X86PD *pPDDst = pPGM->CTXMID(p,32BitPD);
3204 unsigned cPDEs = ELEMENTS(pPDDst->a);
3205# else
3206 const X86PDPAE *pPDDst = pPGM->CTXMID(ap,PaePDs[0]); /* use it as a 2048 entry PD */
3207 unsigned cPDEs = ELEMENTS(pPDDst->a) * ELEMENTS(pPGM->apHCPaePDs);
3208# endif
3209 if (cb != ~(RTGCUINTPTR)0)
3210 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3211
3212/** @todo call the other two PGMAssert*() functions. */
3213
3214# if PGM_GST_TYPE == PGM_TYPE_PAE
3215 /*
3216 * Check the 4 PDPTs too.
3217 */
3218 for (unsigned i = 0; i < 4; i++)
3219 {
3220 RTHCPTR HCPtr;
3221 RTHCPHYS HCPhys;
3222 RTGCPHYS GCPhys = pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[i].u & X86_PDPE_PG_MASK;
3223 int rc2 = pgmRamGCPhys2HCPtrAndHCPhysWithFlags(&pVM->pgm.s, GCPhys, &HCPtr, &HCPhys);
3224 if (VBOX_SUCCESS(rc2))
3225 {
3226 AssertMsg( pVM->pgm.s.apGstPaePDsHC[i] == (R3R0PTRTYPE(PX86PDPAE))HCPtr
3227 && pVM->pgm.s.aGCPhysGstPaePDs[i] == GCPhys,
3228 ("idx %d apGstPaePDsHC %VHv vs %VHv aGCPhysGstPaePDs %VGp vs %VGp\n",
3229 i, pVM->pgm.s.apGstPaePDsHC[i], HCPtr, pVM->pgm.s.aGCPhysGstPaePDs[i], GCPhys));
3230 }
3231 }
3232# endif
3233
3234 /*
3235 * Iterate the shadow page directory.
3236 */
3237 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3238 unsigned iPDDst = GCPtr >> SHW_PD_SHIFT;
3239 cPDEs += iPDDst;
3240 for (;
3241 iPDDst < cPDEs;
3242 iPDDst++, GCPtr += _4G / cPDEs)
3243 {
3244# if PGM_GST_TYPE == PGM_TYPE_PAE
3245 uint32_t iPDSrc;
3246 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pPGM, (RTGCUINTPTR)GCPtr, &iPDSrc);
3247 if (!pPDSrc)
3248 {
3249 AssertMsg(!pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[(GCPtr >> GST_PDPT_SHIFT) & GST_PDPT_MASK].n.u1Present, ("Guest PDTPR not present, shadow PDPTR %VX64\n", pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[(GCPtr >> GST_PDPT_SHIFT) & GST_PDPT_MASK].u));
3250 continue;
3251 }
3252#endif
3253
3254 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3255 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3256 {
3257 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3258 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3259 {
3260 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3261 cErrors++;
3262 continue;
3263 }
3264 }
3265 else if ( (PdeDst.u & X86_PDE_P)
3266 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3267 )
3268 {
3269 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3270 PPGMPOOLPAGE pPoolPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
3271 if (!pPoolPage)
3272 {
3273 AssertMsgFailed(("Invalid page table address %VGp at %VGv! PdeDst=%#RX64\n",
3274 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3275 cErrors++;
3276 continue;
3277 }
3278 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3279
3280 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3281 {
3282 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %VGv! These flags are not virtualized! PdeDst=%#RX64\n",
3283 GCPtr, (uint64_t)PdeDst.u));
3284 cErrors++;
3285 }
3286
3287 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3288 {
3289 AssertMsgFailed(("4K PDE reserved flags at %VGv! PdeDst=%#RX64\n",
3290 GCPtr, (uint64_t)PdeDst.u));
3291 cErrors++;
3292 }
3293
3294 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3295 if (!PdeSrc.n.u1Present)
3296 {
3297 AssertMsgFailed(("Guest PDE at %VGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3298 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3299 cErrors++;
3300 continue;
3301 }
3302
3303 if ( !PdeSrc.b.u1Size
3304 || !(cr4 & X86_CR4_PSE))
3305 {
3306 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3307# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3308 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3309# endif
3310 }
3311 else
3312 {
3313# if PGM_GST_TYPE == PGM_TYPE_32BIT
3314 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3315 {
3316 AssertMsgFailed(("Guest PDE at %VGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3317 GCPtr, (uint64_t)PdeSrc.u));
3318 cErrors++;
3319 continue;
3320 }
3321# endif
3322 GCPhysGst = PdeSrc.u & GST_PDE_BIG_PG_MASK;
3323# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3324 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3325# endif
3326 }
3327
3328 if ( pPoolPage->enmKind
3329 != (!PdeSrc.b.u1Size || !(cr4 & X86_CR4_PSE) ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3330 {
3331 AssertMsgFailed(("Invalid shadow page table kind %d at %VGv! PdeSrc=%#RX64\n",
3332 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3333 cErrors++;
3334 }
3335
3336 PPGMPAGE pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3337 if (!pPhysPage)
3338 {
3339 AssertMsgFailed(("Cannot find guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3340 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3341 cErrors++;
3342 continue;
3343 }
3344
3345 if (GCPhysGst != pPoolPage->GCPhys)
3346 {
3347 AssertMsgFailed(("GCPhysGst=%VGp != pPage->GCPhys=%VGp at %VGv\n",
3348 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3349 cErrors++;
3350 continue;
3351 }
3352
3353 if ( !PdeSrc.b.u1Size
3354 || !(cr4 & X86_CR4_PSE))
3355 {
3356 /*
3357 * Page Table.
3358 */
3359 const GSTPT *pPTSrc;
3360 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3361 if (VBOX_FAILURE(rc))
3362 {
3363 AssertMsgFailed(("Cannot map/convert guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3364 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3365 cErrors++;
3366 continue;
3367 }
3368 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3369 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3370 {
3371 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3372 // (This problem will go away when/if we shadow multiple CR3s.)
3373 AssertMsgFailed(("4K PDE flags mismatch at %VGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3374 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3375 cErrors++;
3376 continue;
3377 }
3378 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3379 {
3380 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%VGv PdeDst=%#RX64\n",
3381 GCPtr, (uint64_t)PdeDst.u));
3382 cErrors++;
3383 continue;
3384 }
3385
3386 /* iterate the page table. */
3387# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3388 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3389 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3390# else
3391 const unsigned offPTSrc = 0;
3392# endif
3393 for (unsigned iPT = 0, off = 0;
3394 iPT < ELEMENTS(pPTDst->a);
3395 iPT++, off += PAGE_SIZE)
3396 {
3397 const SHWPTE PteDst = pPTDst->a[iPT];
3398
3399 /* skip not-present entries. */
3400 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3401 continue;
3402 Assert(PteDst.n.u1Present);
3403
3404 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3405 if (!PteSrc.n.u1Present)
3406 {
3407#ifdef IN_RING3
3408 PGMAssertHandlerAndFlagsInSync(pVM);
3409 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3410#endif
3411 AssertMsgFailed(("Out of sync (!P) PTE at %VGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%VGv iPTSrc=%x PdeSrc=%x physpte=%VGp\n",
3412 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3413 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3414 cErrors++;
3415 continue;
3416 }
3417
3418 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3419# if 1 /** @todo sync accessed bit properly... */
3420 fIgnoreFlags |= X86_PTE_A;
3421# endif
3422
3423 /* match the physical addresses */
3424 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3425 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3426
3427# ifdef IN_RING3
3428 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3429 if (VBOX_FAILURE(rc))
3430 {
3431 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3432 {
3433 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3434 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3435 cErrors++;
3436 continue;
3437 }
3438 }
3439 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3440 {
3441 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3442 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3443 cErrors++;
3444 continue;
3445 }
3446# endif
3447
3448 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3449 if (!pPhysPage)
3450 {
3451# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3452 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3453 {
3454 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3455 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3456 cErrors++;
3457 continue;
3458 }
3459# endif
3460 if (PteDst.n.u1Write)
3461 {
3462 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3463 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3464 cErrors++;
3465 }
3466 fIgnoreFlags |= X86_PTE_RW;
3467 }
3468 else if (HCPhysShw != (PGM_PAGE_GET_HCPHYS(pPhysPage) & SHW_PTE_PG_MASK))
3469 {
3470 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3471 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3472 cErrors++;
3473 continue;
3474 }
3475
3476 /* flags */
3477 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3478 {
3479 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3480 {
3481 if (PteDst.n.u1Write)
3482 {
3483 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PteSrc=%#RX64 PteDst=%#RX64\n",
3484 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3485 cErrors++;
3486 continue;
3487 }
3488 fIgnoreFlags |= X86_PTE_RW;
3489 }
3490 else
3491 {
3492 if (PteDst.n.u1Present)
3493 {
3494 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VHp PteSrc=%#RX64 PteDst=%#RX64\n",
3495 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3496 cErrors++;
3497 continue;
3498 }
3499 fIgnoreFlags |= X86_PTE_P;
3500 }
3501 }
3502 else
3503 {
3504 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3505 {
3506 if (PteDst.n.u1Write)
3507 {
3508 AssertMsgFailed(("!DIRTY page at %VGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3509 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3510 cErrors++;
3511 continue;
3512 }
3513 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3514 {
3515 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3516 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3517 cErrors++;
3518 continue;
3519 }
3520 if (PteDst.n.u1Dirty)
3521 {
3522 AssertMsgFailed(("!DIRTY page at %VGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3523 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3524 cErrors++;
3525 }
3526# if 0 /** @todo sync access bit properly... */
3527 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3528 {
3529 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3530 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3531 cErrors++;
3532 }
3533 fIgnoreFlags |= X86_PTE_RW;
3534# else
3535 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3536# endif
3537 }
3538 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3539 {
3540 /* access bit emulation (not implemented). */
3541 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3542 {
3543 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3544 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3545 cErrors++;
3546 continue;
3547 }
3548 if (!PteDst.n.u1Accessed)
3549 {
3550 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3551 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3552 cErrors++;
3553 }
3554 fIgnoreFlags |= X86_PTE_P;
3555 }
3556# ifdef DEBUG_sandervl
3557 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3558# endif
3559 }
3560
3561 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3562 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
3563 )
3564 {
3565 AssertMsgFailed(("Flags mismatch at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3566 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3567 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3568 cErrors++;
3569 continue;
3570 }
3571 } /* foreach PTE */
3572 }
3573 else
3574 {
3575 /*
3576 * Big Page.
3577 */
3578 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
3579 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
3580 {
3581 if (PdeDst.n.u1Write)
3582 {
3583 AssertMsgFailed(("!DIRTY page at %VGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3584 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3585 cErrors++;
3586 continue;
3587 }
3588 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
3589 {
3590 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3591 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3592 cErrors++;
3593 continue;
3594 }
3595# if 0 /** @todo sync access bit properly... */
3596 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
3597 {
3598 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3599 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3600 cErrors++;
3601 }
3602 fIgnoreFlags |= X86_PTE_RW;
3603# else
3604 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3605# endif
3606 }
3607 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3608 {
3609 /* access bit emulation (not implemented). */
3610 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
3611 {
3612 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3613 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3614 cErrors++;
3615 continue;
3616 }
3617 if (!PdeDst.n.u1Accessed)
3618 {
3619 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3620 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3621 cErrors++;
3622 }
3623 fIgnoreFlags |= X86_PTE_P;
3624 }
3625
3626 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
3627 {
3628 AssertMsgFailed(("Flags mismatch (B) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
3629 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
3630 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3631 cErrors++;
3632 }
3633
3634 /* iterate the page table. */
3635 for (unsigned iPT = 0, off = 0;
3636 iPT < ELEMENTS(pPTDst->a);
3637 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
3638 {
3639 const SHWPTE PteDst = pPTDst->a[iPT];
3640
3641 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3642 {
3643 AssertMsgFailed(("The PTE at %VGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
3644 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3645 cErrors++;
3646 }
3647
3648 /* skip not-present entries. */
3649 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
3650 continue;
3651
3652 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3653
3654 /* match the physical addresses */
3655 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
3656
3657# ifdef IN_RING3
3658 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3659 if (VBOX_FAILURE(rc))
3660 {
3661 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3662 {
3663 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3664 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3665 cErrors++;
3666 }
3667 }
3668 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
3669 {
3670 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3671 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3672 cErrors++;
3673 continue;
3674 }
3675# endif
3676
3677 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3678 if (!pPhysPage)
3679 {
3680# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3681 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3682 {
3683 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3684 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3685 cErrors++;
3686 continue;
3687 }
3688# endif
3689 if (PteDst.n.u1Write)
3690 {
3691 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3692 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3693 cErrors++;
3694 }
3695 fIgnoreFlags |= X86_PTE_RW;
3696 }
3697 else if (HCPhysShw != (pPhysPage->HCPhys & X86_PTE_PAE_PG_MASK))
3698 {
3699 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3700 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3701 cErrors++;
3702 continue;
3703 }
3704
3705 /* flags */
3706 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3707 {
3708 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3709 {
3710 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
3711 {
3712 if (PteDst.n.u1Write)
3713 {
3714 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
3715 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3716 cErrors++;
3717 continue;
3718 }
3719 fIgnoreFlags |= X86_PTE_RW;
3720 }
3721 }
3722 else
3723 {
3724 if (PteDst.n.u1Present)
3725 {
3726 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
3727 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3728 cErrors++;
3729 continue;
3730 }
3731 fIgnoreFlags |= X86_PTE_P;
3732 }
3733 }
3734
3735 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3736 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
3737 )
3738 {
3739 AssertMsgFailed(("Flags mismatch (BT) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
3740 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3741 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3742 cErrors++;
3743 continue;
3744 }
3745 } /* foreach PTE */
3746 }
3747 }
3748 /* not present */
3749
3750 } /* forearch PDE */
3751
3752# ifdef DEBUG
3753 if (cErrors)
3754 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
3755# endif
3756
3757#elif PGM_GST_TYPE == PGM_TYPE_PAE
3758//# error not implemented
3759
3760
3761#elif PGM_GST_TYPE == PGM_TYPE_AMD64
3762//# error not implemented
3763
3764/*#else: guest real and protected mode */
3765#endif
3766 return cErrors;
3767}
3768#endif /* VBOX_STRICT */
3769
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette