VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 9026

Last change on this file since 9026 was 9021, checked in by vboxsync, 17 years ago

Nested paging updates. Extra paging mode added to prevent illegal changes to the shadow page table.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 163.7 KB
Line 
1/* $Id: PGMAllBth.h 9021 2008-05-21 14:38:13Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27__BEGIN_DECLS
28PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCUINTPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PGSTPD pPDSrc, RTGCUINTPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCUINTPTR GCPtr = 0, RTGCUINTPTR cb = ~(RTGCUINTPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42__END_DECLS
43
44
45/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
46#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED
47# error "Invalid combination; PAE guest implies PAE shadow"
48#endif
49
50#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
51 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED)
52# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
53#endif
54
55#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
56 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED)
57# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
58#endif
59
60#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED) \
61 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
62# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
63#endif
64
65#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
66# define PGM_WITHOUT_MAPPINGS
67#endif
68
69/**
70 * #PF Handler for raw-mode guest execution.
71 *
72 * @returns VBox status code (appropriate for trap handling and GC return).
73 * @param pVM VM Handle.
74 * @param uErr The trap error code.
75 * @param pRegFrame Trap register frame.
76 * @param pvFault The fault address.
77 */
78PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
79{
80#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
81 && PGM_SHW_TYPE != PGM_TYPE_NESTED
82
83# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
84 /*
85 * Hide the instruction fetch trap indicator for now.
86 */
87 /** @todo NXE will change this and we must fix NXE in the switcher too! */
88 if (uErr & X86_TRAP_PF_ID)
89 {
90 uErr &= ~X86_TRAP_PF_ID;
91 TRPMSetErrorCode(pVM, uErr);
92 }
93# endif
94
95 /*
96 * Get PDs.
97 */
98 int rc;
99# if PGM_WITH_PAGING(PGM_GST_TYPE)
100# if PGM_GST_TYPE == PGM_TYPE_32BIT
101 const unsigned iPDSrc = (RTGCUINTPTR)pvFault >> GST_PD_SHIFT;
102 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
103
104# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
105
106# if PGM_GST_TYPE == PGM_TYPE_PAE
107 unsigned iPDSrc;
108 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, (RTGCUINTPTR)pvFault, &iPDSrc);
109
110# elif PGM_GST_TYPE == PGM_TYPE_AMD64
111 unsigned iPDSrc;
112 PX86PML4E pPml4e;
113 X86PDPE Pdpe;
114 PGSTPD pPDSrc;
115
116 pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, pvFault, &pPml4e, &Pdpe, &iPDSrc);
117 Assert(pPml4e);
118# endif
119 /* Quick check for a valid guest trap. */
120 if (!pPDSrc)
121 {
122 LogFlow(("Trap0eHandler: guest PDPTR not present CR3=%VGp\n", (CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK)));
123 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eGuestTrap; });
124 TRPMSetErrorCode(pVM, uErr);
125 return VINF_EM_RAW_GUEST_TRAP;
126 }
127# endif
128# else
129 PGSTPD pPDSrc = NULL;
130 const unsigned iPDSrc = 0;
131# endif
132
133# if PGM_SHW_TYPE == PGM_TYPE_32BIT
134 const unsigned iPDDst = (RTGCUINTPTR)pvFault >> SHW_PD_SHIFT;
135 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
136# elif PGM_SHW_TYPE == PGM_TYPE_PAE
137 const unsigned iPDDst = (RTGCUINTPTR)pvFault >> SHW_PD_SHIFT;
138 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]; /* We treat this as a PD with 2048 entries, so no need to and with SHW_PD_MASK to get iPDDst */
139
140# if PGM_GST_TYPE == PGM_TYPE_PAE
141 /* Did we mark the PDPT as not present in SyncCR3? */
142 unsigned iPDPTE = ((RTGCUINTPTR)pvFault >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
143 if (!pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].n.u1Present)
144 pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].n.u1Present = 1;
145
146# endif
147
148# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
149 const unsigned iPDDst = (((RTGCUINTPTR)pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
150 PX86PDPAE pPDDst;
151
152 rc = PGMShwGetLongModePDPtr(pVM, (RTGCUINTPTR)pvFault, &pPDDst);
153 if (rc != VINF_SUCCESS)
154 {
155 AssertMsg(rc == VINF_PGM_SYNC_CR3, ("Unexpected rc=%Vrc\n", rc));
156 return rc;
157 }
158 Assert(pPDDst);
159# endif
160
161# if PGM_WITH_PAGING(PGM_GST_TYPE)
162 /*
163 * If we successfully correct the write protection fault due to dirty bit
164 * tracking, or this page fault is a genuine one, then return immediately.
165 */
166 STAM_PROFILE_START(&pVM->pgm.s.StatCheckPageFault, e);
167 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], (RTGCUINTPTR)pvFault);
168 STAM_PROFILE_STOP(&pVM->pgm.s.StatCheckPageFault, e);
169 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
170 || rc == VINF_EM_RAW_GUEST_TRAP)
171 {
172 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution)
173 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatTrap0eDirtyAndAccessedBits : &pVM->pgm.s.StatTrap0eGuestTrap; });
174 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
175 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
176 }
177
178 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0ePD[iPDSrc]);
179# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
180
181 /*
182 * A common case is the not-present error caused by lazy page table syncing.
183 *
184 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
185 * so we can safely assume that the shadow PT is present when calling SyncPage later.
186 *
187 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
188 * of mapping conflict and defer to SyncCR3 in R3.
189 * (Again, we do NOT support access handlers for non-present guest pages.)
190 *
191 */
192# if PGM_WITH_PAGING(PGM_GST_TYPE)
193 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
194# else
195 GSTPDE PdeSrc;
196 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
197 PdeSrc.n.u1Present = 1;
198 PdeSrc.n.u1Write = 1;
199 PdeSrc.n.u1Accessed = 1;
200 PdeSrc.n.u1User = 1;
201# endif
202 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
203 && !pPDDst->a[iPDDst].n.u1Present
204 && PdeSrc.n.u1Present
205 )
206
207 {
208 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eSyncPT; });
209 STAM_PROFILE_START(&pVM->pgm.s.StatLazySyncPT, f);
210 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
211 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, (RTGCUINTPTR)pvFault);
212 if (VBOX_SUCCESS(rc))
213 {
214 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
215 return rc;
216 }
217 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
218 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
219 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
220 return VINF_PGM_SYNC_CR3;
221 }
222
223# if PGM_WITH_PAGING(PGM_GST_TYPE)
224 /*
225 * Check if this address is within any of our mappings.
226 *
227 * This is *very* fast and it's gonna save us a bit of effort below and prevent
228 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
229 * (BTW, it's impossible to have physical access handlers in a mapping.)
230 */
231 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
232 {
233 STAM_PROFILE_START(&pVM->pgm.s.StatMapping, a);
234 PPGMMAPPING pMapping = CTXALLSUFF(pVM->pgm.s.pMappings);
235 for ( ; pMapping; pMapping = CTXALLSUFF(pMapping->pNext))
236 {
237 if ((RTGCUINTPTR)pvFault < (RTGCUINTPTR)pMapping->GCPtr)
238 break;
239 if ((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pMapping->GCPtr < pMapping->cb)
240 {
241 /*
242 * The first thing we check is if we've got an undetected conflict.
243 */
244 if (!pVM->pgm.s.fMappingsFixed)
245 {
246 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
247 while (iPT-- > 0)
248 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
249 {
250 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eConflicts);
251 Log(("Trap0e: Detected Conflict %VGv-%VGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
252 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
253 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
254 return VINF_PGM_SYNC_CR3;
255 }
256 }
257
258 /*
259 * Check if the fault address is in a virtual page access handler range.
260 */
261 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->HyperVirtHandlers, pvFault);
262 if ( pCur
263 && (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
264 && uErr & X86_TRAP_PF_RW)
265 {
266# ifdef IN_GC
267 STAM_PROFILE_START(&pCur->Stat, h);
268 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
269 STAM_PROFILE_STOP(&pCur->Stat, h);
270# else
271 AssertFailed();
272 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
273# endif
274 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eMapHandler);
275 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
276 return rc;
277 }
278
279 /*
280 * Pretend we're not here and let the guest handle the trap.
281 */
282 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
283 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eMap);
284 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
285 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
286 return VINF_EM_RAW_GUEST_TRAP;
287 }
288 }
289 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
290 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
291# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
292
293 /*
294 * Check if this fault address is flagged for special treatment,
295 * which means we'll have to figure out the physical address and
296 * check flags associated with it.
297 *
298 * ASSUME that we can limit any special access handling to pages
299 * in page tables which the guest believes to be present.
300 */
301 if (PdeSrc.n.u1Present)
302 {
303 RTGCPHYS GCPhys = NIL_RTGCPHYS;
304
305# if PGM_WITH_PAGING(PGM_GST_TYPE)
306 uint32_t cr4 = CPUMGetGuestCR4(pVM);
307 if ( PdeSrc.b.u1Size
308 && (cr4 & X86_CR4_PSE))
309 GCPhys = (PdeSrc.u & GST_PDE_BIG_PG_MASK)
310 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
311 else
312 {
313 PGSTPT pPTSrc;
314 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
315 if (VBOX_SUCCESS(rc))
316 {
317 unsigned iPTESrc = ((RTGCUINTPTR)pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
318 if (pPTSrc->a[iPTESrc].n.u1Present)
319 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
320 }
321 }
322# else
323 /* No paging so the fault address is the physical address */
324 GCPhys = (RTGCPHYS)((RTGCUINTPTR)pvFault & ~PAGE_OFFSET_MASK);
325# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
326
327 /*
328 * If we have a GC address we'll check if it has any flags set.
329 */
330 if (GCPhys != NIL_RTGCPHYS)
331 {
332 STAM_PROFILE_START(&pVM->pgm.s.StatHandlers, b);
333
334 PPGMPAGE pPage;
335 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
336 if (VBOX_SUCCESS(rc))
337 {
338 if (PGM_PAGE_HAS_ANY_HANDLERS(pPage))
339 {
340 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
341 {
342 /*
343 * Physical page access handler.
344 */
345 const RTGCPHYS GCPhysFault = GCPhys | ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK);
346 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->PhysHandlers, GCPhysFault);
347 if (pCur)
348 {
349# ifdef PGM_SYNC_N_PAGES
350 /*
351 * If the region is write protected and we got a page not present fault, then sync
352 * the pages. If the fault was caused by a read, then restart the instruction.
353 * In case of write access continue to the GC write handler.
354 *
355 * ASSUMES that there is only one handler per page or that they have similar write properties.
356 */
357 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
358 && !(uErr & X86_TRAP_PF_P))
359 {
360 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
361 if ( VBOX_FAILURE(rc)
362 || !(uErr & X86_TRAP_PF_RW)
363 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
364 {
365 AssertRC(rc);
366 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
367 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
368 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
369 return rc;
370 }
371 }
372# endif
373
374 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
375 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
376 ("Unexpected trap for physical handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
377
378#if defined(IN_GC) || defined(IN_RING0)
379 if (CTXALLSUFF(pCur->pfnHandler))
380 {
381 STAM_PROFILE_START(&pCur->Stat, h);
382 rc = pCur->CTXALLSUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, CTXALLSUFF(pCur->pvUser));
383 STAM_PROFILE_STOP(&pCur->Stat, h);
384 }
385 else
386#endif
387 rc = VINF_EM_RAW_EMULATE_INSTR;
388 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersPhysical);
389 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
390 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndPhys; });
391 return rc;
392 }
393 }
394# if PGM_WITH_PAGING(PGM_GST_TYPE)
395 else
396 {
397# ifdef PGM_SYNC_N_PAGES
398 /*
399 * If the region is write protected and we got a page not present fault, then sync
400 * the pages. If the fault was caused by a read, then restart the instruction.
401 * In case of write access continue to the GC write handler.
402 */
403 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
404 && !(uErr & X86_TRAP_PF_P))
405 {
406 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
407 if ( VBOX_FAILURE(rc)
408 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
409 || !(uErr & X86_TRAP_PF_RW))
410 {
411 AssertRC(rc);
412 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
413 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
414 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndVirt; });
415 return rc;
416 }
417 }
418# endif
419 /*
420 * Ok, it's an virtual page access handler.
421 *
422 * Since it's faster to search by address, we'll do that first
423 * and then retry by GCPhys if that fails.
424 */
425 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
426 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
427 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
428 */
429 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
430 if (pCur)
431 {
432 AssertMsg(!((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
433 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
434 || !(uErr & X86_TRAP_PF_P)
435 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
436 ("Unexpected trap for virtual handler: %VGv (phys=%VGp) HCPhys=%HGp uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
437
438 if ( (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
439 && ( uErr & X86_TRAP_PF_RW
440 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
441 {
442# ifdef IN_GC
443 STAM_PROFILE_START(&pCur->Stat, h);
444 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
445 STAM_PROFILE_STOP(&pCur->Stat, h);
446# else
447 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
448# endif
449 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtual);
450 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
451 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
452 return rc;
453 }
454 /* Unhandled part of a monitored page */
455 }
456 else
457 {
458 /* Check by physical address. */
459 PPGMVIRTHANDLER pCur;
460 unsigned iPage;
461 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK),
462 &pCur, &iPage);
463 Assert(VBOX_SUCCESS(rc) || !pCur);
464 if ( pCur
465 && ( uErr & X86_TRAP_PF_RW
466 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
467 {
468 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
469# ifdef IN_GC
470 RTGCUINTPTR off = (iPage << PAGE_SHIFT) + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK) - ((RTGCUINTPTR)pCur->GCPtr & PAGE_OFFSET_MASK);
471 Assert(off < pCur->cb);
472 STAM_PROFILE_START(&pCur->Stat, h);
473 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, off);
474 STAM_PROFILE_STOP(&pCur->Stat, h);
475# else
476 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
477# endif
478 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualByPhys);
479 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
480 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
481 return rc;
482 }
483 }
484 }
485# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
486
487 /*
488 * There is a handled area of the page, but this fault doesn't belong to it.
489 * We must emulate the instruction.
490 *
491 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
492 * we first check if this was a page-not-present fault for a page with only
493 * write access handlers. Restart the instruction if it wasn't a write access.
494 */
495 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersUnhandled);
496
497 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
498 && !(uErr & X86_TRAP_PF_P))
499 {
500 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
501 if ( VBOX_FAILURE(rc)
502 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
503 || !(uErr & X86_TRAP_PF_RW))
504 {
505 AssertRC(rc);
506 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
507 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
508 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
509 return rc;
510 }
511 }
512
513 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
514 * It's writing to an unhandled part of the LDT page several million times.
515 */
516 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
517 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d HCPhys=%RHp%s%s\n",
518 rc, pPage->HCPhys,
519 PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage) ? " phys" : "",
520 PGM_PAGE_HAS_ANY_VIRTUAL_HANDLERS(pPage) ? " virt" : ""));
521 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
522 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndUnhandled; });
523 return rc;
524 } /* if any kind of handler */
525
526# if PGM_WITH_PAGING(PGM_GST_TYPE)
527 if (uErr & X86_TRAP_PF_P)
528 {
529 /*
530 * The page isn't marked, but it might still be monitored by a virtual page access handler.
531 * (ASSUMES no temporary disabling of virtual handlers.)
532 */
533 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
534 * we should correct both the shadow page table and physical memory flags, and not only check for
535 * accesses within the handler region but for access to pages with virtual handlers. */
536 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
537 if (pCur)
538 {
539 AssertMsg( !((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
540 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
541 || !(uErr & X86_TRAP_PF_P)
542 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
543 ("Unexpected trap for virtual handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
544
545 if ( (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
546 && ( uErr & X86_TRAP_PF_RW
547 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
548 {
549# ifdef IN_GC
550 STAM_PROFILE_START(&pCur->Stat, h);
551 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
552 STAM_PROFILE_STOP(&pCur->Stat, h);
553# else
554 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
555# endif
556 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualUnmarked);
557 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
558 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
559 return rc;
560 }
561 }
562 }
563# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
564 }
565 else
566 {
567 /* When the guest accesses invalid physical memory (e.g. probing of RAM or accessing a remapped MMIO range), then we'll fall
568 * back to the recompiler to emulate the instruction.
569 */
570 LogFlow(("pgmPhysGetPageEx %VGp failed with %Vrc\n", GCPhys, rc));
571 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersInvalid);
572 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
573 return VINF_EM_RAW_EMULATE_INSTR;
574 }
575
576 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
577
578# ifdef PGM_OUT_OF_SYNC_IN_GC
579 /*
580 * We are here only if page is present in Guest page tables and trap is not handled
581 * by our handlers.
582 * Check it for page out-of-sync situation.
583 */
584 STAM_PROFILE_START(&pVM->pgm.s.StatOutOfSync, c);
585
586 if (!(uErr & X86_TRAP_PF_P))
587 {
588 /*
589 * Page is not present in our page tables.
590 * Try to sync it!
591 * BTW, fPageShw is invalid in this branch!
592 */
593 if (uErr & X86_TRAP_PF_US)
594 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
595 else /* supervisor */
596 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
597
598# if defined(LOG_ENABLED) && !defined(IN_RING0)
599 RTGCPHYS GCPhys;
600 uint64_t fPageGst;
601 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
602 Log(("Page out of sync: %p eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%VGp scan=%d\n",
603 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTGCPTR)pRegFrame->eip)));
604# endif /* LOG_ENABLED */
605
606# if PGM_WITH_PAGING(PGM_GST_TYPE) && !defined(IN_RING0)
607 if (CPUMGetGuestCPL(pVM, pRegFrame) == 0)
608 {
609 uint64_t fPageGst;
610 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
611 if ( VBOX_SUCCESS(rc)
612 && !(fPageGst & X86_PTE_US))
613 {
614 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
615 if ( pvFault == (RTGCPTR)pRegFrame->eip
616 || (RTGCUINTPTR)pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
617# ifdef CSAM_DETECT_NEW_CODE_PAGES
618 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
619 && CSAMDoesPageNeedScanning(pVM, (RTGCPTR)pRegFrame->eip)) /* any new code we encounter here */
620# endif /* CSAM_DETECT_NEW_CODE_PAGES */
621 )
622 {
623 LogFlow(("CSAMExecFault %VGv\n", pRegFrame->eip));
624 rc = CSAMExecFault(pVM, (RTGCPTR)pRegFrame->eip);
625 if (rc != VINF_SUCCESS)
626 {
627 /*
628 * CSAM needs to perform a job in ring 3.
629 *
630 * Sync the page before going to the host context; otherwise we'll end up in a loop if
631 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
632 */
633 LogFlow(("CSAM ring 3 job\n"));
634 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
635 AssertRC(rc2);
636
637 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
638 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eCSAM; });
639 return rc;
640 }
641 }
642# ifdef CSAM_DETECT_NEW_CODE_PAGES
643 else
644 if ( uErr == X86_TRAP_PF_RW
645 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
646 && pRegFrame->ecx < 0x10000
647 )
648 {
649 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
650 * to detect loading of new code pages.
651 */
652
653 /*
654 * Decode the instruction.
655 */
656 RTGCPTR PC;
657 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
658 if (rc == VINF_SUCCESS)
659 {
660 DISCPUSTATE Cpu;
661 uint32_t cbOp;
662 rc = EMInterpretDisasOneEx(pVM, (RTGCUINTPTR)PC, pRegFrame, &Cpu, &cbOp);
663
664 /* For now we'll restrict this to rep movsw/d instructions */
665 if ( rc == VINF_SUCCESS
666 && Cpu.pCurInstr->opcode == OP_MOVSWD
667 && (Cpu.prefix & PREFIX_REP))
668 {
669 CSAMMarkPossibleCodePage(pVM, pvFault);
670 }
671 }
672 }
673# endif /* CSAM_DETECT_NEW_CODE_PAGES */
674
675 /*
676 * Mark this page as safe.
677 */
678 /** @todo not correct for pages that contain both code and data!! */
679 Log2(("CSAMMarkPage %p; scanned=%d\n", pvFault, true));
680 CSAMMarkPage(pVM, pvFault, true);
681 }
682 }
683# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) && !defined(IN_RING0) */
684 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
685 if (VBOX_SUCCESS(rc))
686 {
687 /* The page was successfully synced, return to the guest. */
688 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
689 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSync; });
690 return VINF_SUCCESS;
691 }
692 }
693 else
694 {
695 /*
696 * A side effect of not flushing global PDEs are out of sync pages due
697 * to physical monitored regions, that are no longer valid.
698 * Assume for now it only applies to the read/write flag
699 */
700 if (VBOX_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
701 {
702 if (uErr & X86_TRAP_PF_US)
703 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
704 else /* supervisor */
705 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
706
707
708 /*
709 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the page is not present, which is not true in this case.
710 */
711 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
712 if (VBOX_SUCCESS(rc))
713 {
714 /*
715 * Page was successfully synced, return to guest.
716 */
717# ifdef VBOX_STRICT
718 RTGCPHYS GCPhys;
719 uint64_t fPageGst;
720 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
721 Assert(VBOX_SUCCESS(rc) && fPageGst & X86_PTE_RW);
722 LogFlow(("Obsolete physical monitor page out of sync %VGv - phys %VGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
723
724 uint64_t fPageShw;
725 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
726 AssertMsg(VBOX_SUCCESS(rc) && fPageShw & X86_PTE_RW, ("rc=%Vrc fPageShw=%VX64\n", rc, fPageShw));
727# endif /* VBOX_STRICT */
728 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
729 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncObsHnd; });
730 return VINF_SUCCESS;
731 }
732
733 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
734 if ( CPUMGetGuestCPL(pVM, pRegFrame) == 0
735 && ((CPUMGetGuestCR0(pVM) & (X86_CR0_WP|X86_CR0_PG)) == X86_CR0_PG)
736 && (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P))
737 {
738 uint64_t fPageGst;
739 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
740 if ( VBOX_SUCCESS(rc)
741 && !(fPageGst & X86_PTE_RW))
742 {
743 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
744 if (VBOX_SUCCESS(rc))
745 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eWPEmulGC);
746 else
747 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eWPEmulR3);
748 return rc;
749 }
750 else
751 AssertMsgFailed(("Unexpected r/w page %x flag=%x\n", pvFault, (uint32_t)fPageGst));
752 }
753
754 }
755
756# if PGM_WITH_PAGING(PGM_GST_TYPE)
757# ifdef VBOX_STRICT
758 /*
759 * Check for VMM page flags vs. Guest page flags consistency.
760 * Currently only for debug purposes.
761 */
762 if (VBOX_SUCCESS(rc))
763 {
764 /* Get guest page flags. */
765 uint64_t fPageGst;
766 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
767 if (VBOX_SUCCESS(rc))
768 {
769 uint64_t fPageShw;
770 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
771
772 /*
773 * Compare page flags.
774 * Note: we have AVL, A, D bits desynched.
775 */
776 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
777 ("Page flags mismatch! pvFault=%p GCPhys=%VGp fPageShw=%08llx fPageGst=%08llx\n", pvFault, GCPhys, fPageShw, fPageGst));
778 }
779 else
780 AssertMsgFailed(("PGMGstGetPage rc=%Vrc\n", rc));
781 }
782 else
783 AssertMsgFailed(("PGMGCGetPage rc=%Vrc\n", rc));
784# endif /* VBOX_STRICT */
785# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
786 }
787 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
788# endif /* PGM_OUT_OF_SYNC_IN_GC */
789 }
790 else
791 {
792 /*
793 * Page not present in Guest OS or invalid page table address.
794 * This is potential virtual page access handler food.
795 *
796 * For the present we'll say that our access handlers don't
797 * work for this case - we've already discarded the page table
798 * not present case which is identical to this.
799 *
800 * When we perchance find we need this, we will probably have AVL
801 * trees (offset based) to operate on and we can measure their speed
802 * agains mapping a page table and probably rearrange this handling
803 * a bit. (Like, searching virtual ranges before checking the
804 * physical address.)
805 */
806 }
807 }
808
809
810# if PGM_WITH_PAGING(PGM_GST_TYPE)
811 /*
812 * Conclusion, this is a guest trap.
813 */
814 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
815 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eUnhandled);
816 return VINF_EM_RAW_GUEST_TRAP;
817# else
818 /* present, but not a monitored page; perhaps the guest is probing physical memory */
819 return VINF_EM_RAW_EMULATE_INSTR;
820# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
821
822
823#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
824
825 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
826 return VERR_INTERNAL_ERROR;
827#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
828}
829
830
831/**
832 * Emulation of the invlpg instruction.
833 *
834 *
835 * @returns VBox status code.
836 *
837 * @param pVM VM handle.
838 * @param GCPtrPage Page to invalidate.
839 *
840 * @remark ASSUMES that the guest is updating before invalidating. This order
841 * isn't required by the CPU, so this is speculative and could cause
842 * trouble.
843 *
844 * @todo Flush page or page directory only if necessary!
845 * @todo Add a #define for simply invalidating the page.
846 */
847PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage)
848{
849#if PGM_WITH_PAGING(PGM_GST_TYPE) \
850 && PGM_SHW_TYPE != PGM_TYPE_NESTED
851 int rc;
852
853 LogFlow(("InvalidatePage %x\n", GCPtrPage));
854 /*
855 * Get the shadow PD entry and skip out if this PD isn't present.
856 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
857 */
858 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
859# if PGM_SHW_TYPE == PGM_TYPE_32BIT
860 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
861# elif PGM_SHW_TYPE == PGM_TYPE_PAE
862 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs[0])->a[iPDDst];
863# else /* AMD64 */
864 /* PML4 */
865 const unsigned iPml4 = ((RTGCUINTPTR64)GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
866 PX86PML4E pPml4eDst = &CTXMID(pVM->pgm.s.p,PaePML4)->a[iPml4];
867 if (!pPml4eDst->n.u1Present)
868 {
869 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
870 return VINF_SUCCESS;
871 }
872
873 /* PDPT */
874 PX86PDPT pPDPT;
875 rc = PGM_HCPHYS_2_PTR(pVM, pPml4eDst->u & X86_PML4E_PG_MASK, &pPDPT);
876 if (VBOX_FAILURE(rc))
877 return rc;
878 const unsigned iPDPT = ((RTGCUINTPTR64)GCPtrPage >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
879 PX86PDPE pPdpeDst = &pPDPT->a[iPDPT];
880 if (!pPdpeDst->n.u1Present)
881 {
882 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
883 return VINF_SUCCESS;
884 }
885
886 /* PD */
887 PX86PDPAE pPd;
888 rc = PGM_HCPHYS_2_PTR(pVM, pPdpeDst->u & X86_PDPE_PG_MASK, &pPd);
889 if (VBOX_FAILURE(rc))
890 return rc;
891 const unsigned iPd = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
892 PX86PDEPAE pPdeDst = &pPd->a[iPd];
893# endif
894
895 const SHWPDE PdeDst = *pPdeDst;
896 if (!PdeDst.n.u1Present)
897 {
898 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
899 return VINF_SUCCESS;
900 }
901
902 /*
903 * Get the guest PD entry and calc big page.
904 */
905# if PGM_GST_TYPE == PGM_TYPE_32BIT
906 PX86PD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
907 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
908 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
909# else
910 unsigned iPDSrc;
911# if PGM_GST_TYPE == PGM_TYPE_PAE
912 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
913# else /* AMD64 */
914 PX86PML4E pPml4eSrc;
915 X86PDPE PdpeSrc;
916 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
917# endif
918 GSTPDE PdeSrc;
919
920 if (pPDSrc)
921 PdeSrc = pPDSrc->a[iPDSrc];
922 else
923 PdeSrc.u = 0;
924# endif
925
926 const uint32_t cr4 = CPUMGetGuestCR4(pVM);
927 const bool fIsBigPage = PdeSrc.b.u1Size && (cr4 & X86_CR4_PSE);
928
929# ifdef IN_RING3
930 /*
931 * If a CR3 Sync is pending we may ignore the invalidate page operation
932 * depending on the kind of sync and if it's a global page or not.
933 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
934 */
935# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
936 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
937 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
938 && fIsBigPage
939 && PdeSrc.b.u1Global
940 && (cr4 & X86_CR4_PGE)
941 )
942 )
943# else
944 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
945# endif
946 {
947 STAM_COUNTER_INC(&pVM->pgm.s.StatHCInvalidatePageSkipped);
948 return VINF_SUCCESS;
949 }
950# endif /* IN_RING3 */
951
952
953# if PGM_GST_TYPE == PGM_TYPE_AMD64
954 Assert(pPml4eDst->n.u1Present && pPml4eDst->u & SHW_PDPT_MASK);
955 if (pPml4eSrc->n.u1Present)
956 {
957 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
958 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
959 {
960 /*
961 * Mark not present so we can resync the PML4E when it's used.
962 */
963 LogFlow(("InvalidatePage: Out-of-sync PML4E at %VGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
964 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
965 pgmPoolFree(pVM, pPml4eDst->u & X86_PML4E_PG_MASK, PGMPOOL_IDX_PML4, iPml4);
966 pPml4eDst->u = 0;
967 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
968 PGM_INVL_GUEST_TLBS();
969 }
970 else if (!pPml4eSrc->n.u1Accessed)
971 {
972 /*
973 * Mark not present so we can set the accessed bit.
974 */
975 pgmPoolFree(pVM, pPml4eDst->u & X86_PML4E_PG_MASK, PGMPOOL_IDX_PML4, iPml4);
976 pPml4eDst->u = 0;
977 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
978 PGM_INVL_GUEST_TLBS();
979 }
980 }
981 else
982 {
983 pgmPoolFree(pVM, pPml4eDst->u & X86_PML4E_PG_MASK, PGMPOOL_IDX_PML4, iPml4);
984 pPml4eDst->u = 0;
985 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
986 PGM_INVL_PG(GCPtrPage);
987 return VINF_SUCCESS;
988 }
989
990 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
991 if (PdpeSrc.n.u1Present)
992 {
993 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
994 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
995 {
996 /*
997 * Mark not present so we can resync the PML4E when it's used.
998 */
999 LogFlow(("InvalidatePage: Out-of-sync PDPE at %VGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1000 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1001 pgmPoolFree(pVM, pPdpeDst->u & SHW_PDPT_MASK, PGMPOOL_IDX_PML4, iPml4);
1002 pPdpeDst->u = 0;
1003 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1004 PGM_INVL_GUEST_TLBS();
1005 }
1006 else if (!PdpeSrc.lm.u1Accessed)
1007 {
1008 /*
1009 * Mark not present so we can set the accessed bit.
1010 */
1011 pgmPoolFree(pVM, pPdpeDst->u & SHW_PDPT_MASK, PGMPOOL_IDX_PML4, iPml4);
1012 pPdpeDst->u = 0;
1013 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
1014 PGM_INVL_GUEST_TLBS();
1015 }
1016 }
1017 else
1018 {
1019 pgmPoolFree(pVM, pPdpeDst->u & SHW_PDPT_MASK, PGMPOOL_IDX_PDPT, iPDDst);
1020 pPdpeDst->u = 0;
1021 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
1022 PGM_INVL_PG(GCPtrPage);
1023 return VINF_SUCCESS;
1024 }
1025# endif
1026
1027 /*
1028 * Deal with the Guest PDE.
1029 */
1030 rc = VINF_SUCCESS;
1031 if (PdeSrc.n.u1Present)
1032 {
1033 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1034 {
1035 /*
1036 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1037 */
1038 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1039 Assert(PGMGetGuestMode(pVM) <= PGMMODE_32_BIT);
1040 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
1041 }
1042 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
1043 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1044 {
1045 /*
1046 * Mark not present so we can resync the PDE when it's used.
1047 */
1048 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1049 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1050 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1051 pPdeDst->u = 0;
1052 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1053 PGM_INVL_GUEST_TLBS();
1054 }
1055 else if (!PdeSrc.n.u1Accessed)
1056 {
1057 /*
1058 * Mark not present so we can set the accessed bit.
1059 */
1060 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1061 pPdeDst->u = 0;
1062 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
1063 PGM_INVL_GUEST_TLBS();
1064 }
1065 else if (!fIsBigPage)
1066 {
1067 /*
1068 * 4KB - page.
1069 */
1070 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1071 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1072# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1073 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1074 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1075# endif
1076 if (pShwPage->GCPhys == GCPhys)
1077 {
1078# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1079 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1080 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1081 if (pPT->a[iPTEDst].n.u1Present)
1082 {
1083# ifdef PGMPOOL_WITH_USER_TRACKING
1084 /* This is very unlikely with caching/monitoring enabled. */
1085 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1086# endif
1087 pPT->a[iPTEDst].u = 0;
1088 }
1089# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1090 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
1091 if (VBOX_SUCCESS(rc))
1092 rc = VINF_SUCCESS;
1093# endif
1094 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4KBPages));
1095 PGM_INVL_PG(GCPtrPage);
1096 }
1097 else
1098 {
1099 /*
1100 * The page table address changed.
1101 */
1102 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%VGp iPDDst=%#x\n",
1103 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1104 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1105 pPdeDst->u = 0;
1106 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1107 PGM_INVL_GUEST_TLBS();
1108 }
1109 }
1110 else
1111 {
1112 /*
1113 * 2/4MB - page.
1114 */
1115 /* Before freeing the page, check if anything really changed. */
1116 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1117 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
1118# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1119 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1120 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1121# endif
1122 if ( pShwPage->GCPhys == GCPhys
1123 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1124 {
1125 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1126 /** @todo PAT */
1127 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1128 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1129 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1130 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1131 {
1132 LogFlow(("Skipping flush for big page containing %VGv (PD=%X .u=%VX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1133 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPagesSkip));
1134 return VINF_SUCCESS;
1135 }
1136 }
1137
1138 /*
1139 * Ok, the page table is present and it's been changed in the guest.
1140 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1141 * We could do this for some flushes in GC too, but we need an algorithm for
1142 * deciding which 4MB pages containing code likely to be executed very soon.
1143 */
1144 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1145 pPdeDst->u = 0;
1146 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPages));
1147 PGM_INVL_BIG_PG(GCPtrPage);
1148 }
1149 }
1150 else
1151 {
1152 /*
1153 * Page directory is not present, mark shadow PDE not present.
1154 */
1155 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1156 {
1157 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1158 pPdeDst->u = 0;
1159 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
1160 PGM_INVL_PG(GCPtrPage);
1161 }
1162 else
1163 {
1164 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1165 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDMappings));
1166 }
1167 }
1168
1169 return rc;
1170
1171#else /* guest real and protected mode */
1172 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1173 return VINF_SUCCESS;
1174#endif
1175}
1176
1177
1178#ifdef PGMPOOL_WITH_USER_TRACKING
1179/**
1180 * Update the tracking of shadowed pages.
1181 *
1182 * @param pVM The VM handle.
1183 * @param pShwPage The shadow page.
1184 * @param HCPhys The physical page we is being dereferenced.
1185 */
1186DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1187{
1188# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1189 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1190 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%VHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1191
1192 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1193 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1194 * 2. write protect all shadowed pages. I.e. implement caching.
1195 */
1196 /*
1197 * Find the guest address.
1198 */
1199 for (PPGMRAMRANGE pRam = CTXALLSUFF(pVM->pgm.s.pRamRanges);
1200 pRam;
1201 pRam = CTXALLSUFF(pRam->pNext))
1202 {
1203 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1204 while (iPage-- > 0)
1205 {
1206 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1207 {
1208 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1209 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1210 pShwPage->cPresent--;
1211 pPool->cPresent--;
1212 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1213 return;
1214 }
1215 }
1216 }
1217
1218 for (;;)
1219 AssertReleaseMsgFailed(("HCPhys=%VHp wasn't found!\n", HCPhys));
1220# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1221 pShwPage->cPresent--;
1222 pVM->pgm.s.CTXSUFF(pPool)->cPresent--;
1223# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1224}
1225
1226
1227/**
1228 * Update the tracking of shadowed pages.
1229 *
1230 * @param pVM The VM handle.
1231 * @param pShwPage The shadow page.
1232 * @param u16 The top 16-bit of the pPage->HCPhys.
1233 * @param pPage Pointer to the guest page. this will be modified.
1234 * @param iPTDst The index into the shadow table.
1235 */
1236DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1237{
1238# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1239 /*
1240 * We're making certain assumptions about the placement of cRef and idx.
1241 */
1242 Assert(MM_RAM_FLAGS_IDX_SHIFT == 48);
1243 Assert(MM_RAM_FLAGS_CREFS_SHIFT > MM_RAM_FLAGS_IDX_SHIFT);
1244
1245 /*
1246 * Just deal with the simple first time here.
1247 */
1248 if (!u16)
1249 {
1250 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1251 u16 = (1 << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) | pShwPage->idx;
1252 }
1253 else
1254 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1255
1256 /* write back, trying to be clever... */
1257 Log2(("SyncPageWorkerTrackAddRef: u16=%#x pPage->HCPhys=%VHp->%VHp iPTDst=%#x\n",
1258 u16, pPage->HCPhys, (pPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) | ((uint64_t)u16 << MM_RAM_FLAGS_CREFS_SHIFT), iPTDst));
1259 *((uint16_t *)&pPage->HCPhys + 3) = u16; /** @todo PAGE FLAGS */
1260# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1261
1262 /* update statistics. */
1263 pVM->pgm.s.CTXSUFF(pPool)->cPresent++;
1264 pShwPage->cPresent++;
1265 if (pShwPage->iFirstPresent > iPTDst)
1266 pShwPage->iFirstPresent = iPTDst;
1267}
1268#endif /* PGMPOOL_WITH_USER_TRACKING */
1269
1270
1271/**
1272 * Creates a 4K shadow page for a guest page.
1273 *
1274 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1275 * physical address. The PdeSrc argument only the flags are used. No page structured
1276 * will be mapped in this function.
1277 *
1278 * @param pVM VM handle.
1279 * @param pPteDst Destination page table entry.
1280 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1281 * Can safely assume that only the flags are being used.
1282 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1283 * @param pShwPage Pointer to the shadow page.
1284 * @param iPTDst The index into the shadow table.
1285 *
1286 * @remark Not used for 2/4MB pages!
1287 */
1288DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1289{
1290 if (PteSrc.n.u1Present)
1291 {
1292 /*
1293 * Find the ram range.
1294 */
1295 PPGMPAGE pPage;
1296 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1297 if (VBOX_SUCCESS(rc))
1298 {
1299 /** @todo investiage PWT, PCD and PAT. */
1300 /*
1301 * Make page table entry.
1302 */
1303 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo FLAGS */
1304 SHWPTE PteDst;
1305 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1306 {
1307 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1308 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1309 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1310 | (HCPhys & X86_PTE_PAE_PG_MASK);
1311 else
1312 {
1313 LogFlow(("SyncPageWorker: monitored page (%VGp) -> mark not present\n", HCPhys));
1314 PteDst.u = 0;
1315 }
1316 /** @todo count these two kinds. */
1317 }
1318 else
1319 {
1320 /*
1321 * If the page or page directory entry is not marked accessed,
1322 * we mark the page not present.
1323 */
1324 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1325 {
1326 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1327 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,AccessedPage));
1328 PteDst.u = 0;
1329 }
1330 else
1331 /*
1332 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1333 * when the page is modified.
1334 */
1335 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1336 {
1337 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPage));
1338 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1339 | (HCPhys & X86_PTE_PAE_PG_MASK)
1340 | PGM_PTFLAGS_TRACK_DIRTY;
1341 }
1342 else
1343 {
1344 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageSkipped));
1345 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1346 | (HCPhys & X86_PTE_PAE_PG_MASK);
1347 }
1348 }
1349
1350#ifdef PGMPOOL_WITH_USER_TRACKING
1351 /*
1352 * Keep user track up to date.
1353 */
1354 if (PteDst.n.u1Present)
1355 {
1356 if (!pPteDst->n.u1Present)
1357 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1358 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1359 {
1360 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1361 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1362 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1363 }
1364 }
1365 else if (pPteDst->n.u1Present)
1366 {
1367 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1368 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1369 }
1370#endif /* PGMPOOL_WITH_USER_TRACKING */
1371
1372 /*
1373 * Update statistics and commit the entry.
1374 */
1375 if (!PteSrc.n.u1Global)
1376 pShwPage->fSeenNonGlobal = true;
1377 *pPteDst = PteDst;
1378 }
1379 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1380 /** @todo count these. */
1381 }
1382 else
1383 {
1384 /*
1385 * Page not-present.
1386 */
1387 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1388#ifdef PGMPOOL_WITH_USER_TRACKING
1389 /* Keep user track up to date. */
1390 if (pPteDst->n.u1Present)
1391 {
1392 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1393 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1394 }
1395#endif /* PGMPOOL_WITH_USER_TRACKING */
1396 pPteDst->u = 0;
1397 /** @todo count these. */
1398 }
1399}
1400
1401
1402/**
1403 * Syncs a guest OS page.
1404 *
1405 * There are no conflicts at this point, neither is there any need for
1406 * page table allocations.
1407 *
1408 * @returns VBox status code.
1409 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1410 * @param pVM VM handle.
1411 * @param PdeSrc Page directory entry of the guest.
1412 * @param GCPtrPage Guest context page address.
1413 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1414 * @param uErr Fault error (X86_TRAP_PF_*).
1415 */
1416PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr)
1417{
1418 LogFlow(("SyncPage: GCPtrPage=%VGv cPages=%d uErr=%#x\n", GCPtrPage, cPages, uErr));
1419
1420#if (PGM_GST_TYPE == PGM_TYPE_32BIT \
1421 || PGM_GST_TYPE == PGM_TYPE_PAE) \
1422 && PGM_SHW_TYPE != PGM_TYPE_NESTED
1423
1424# if PGM_WITH_NX(PGM_GST_TYPE)
1425 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1426# endif
1427
1428 /*
1429 * Assert preconditions.
1430 */
1431 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1432 Assert(PdeSrc.n.u1Present);
1433 Assert(cPages);
1434
1435 /*
1436 * Get the shadow PDE, find the shadow page table in the pool.
1437 */
1438 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1439# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1440 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1441# else /* PAE */
1442 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1443# endif
1444 Assert(PdeDst.n.u1Present);
1445 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1446
1447 /*
1448 * Check that the page is present and that the shadow PDE isn't out of sync.
1449 */
1450 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1451 RTGCPHYS GCPhys;
1452 if (!fBigPage)
1453 {
1454 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1455# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1456 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1457 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1458# endif
1459 }
1460 else
1461 {
1462 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
1463# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1464 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1465 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1466# endif
1467 }
1468 if ( pShwPage->GCPhys == GCPhys
1469 && PdeSrc.n.u1Present
1470 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1471 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1472# if PGM_WITH_NX(PGM_GST_TYPE)
1473 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1474# endif
1475 )
1476 {
1477 /*
1478 * Check that the PDE is marked accessed already.
1479 * Since we set the accessed bit *before* getting here on a #PF, this
1480 * check is only meant for dealing with non-#PF'ing paths.
1481 */
1482 if (PdeSrc.n.u1Accessed)
1483 {
1484 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1485 if (!fBigPage)
1486 {
1487 /*
1488 * 4KB Page - Map the guest page table.
1489 */
1490 PGSTPT pPTSrc;
1491 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1492 if (VBOX_SUCCESS(rc))
1493 {
1494# ifdef PGM_SYNC_N_PAGES
1495 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1496 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1497 {
1498 /*
1499 * This code path is currently only taken when the caller is PGMTrap0eHandler
1500 * for non-present pages!
1501 *
1502 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1503 * deal with locality.
1504 */
1505 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1506# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1507 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1508 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1509# else
1510 const unsigned offPTSrc = 0;
1511# endif
1512 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1513 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1514 iPTDst = 0;
1515 else
1516 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1517 for (; iPTDst < iPTDstEnd; iPTDst++)
1518 {
1519 if (!pPTDst->a[iPTDst].n.u1Present)
1520 {
1521 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1522 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1523 NOREF(GCPtrCurPage);
1524#ifndef IN_RING0
1525 /*
1526 * Assuming kernel code will be marked as supervisor - and not as user level
1527 * and executed using a conforming code selector - And marked as readonly.
1528 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1529 */
1530 PPGMPAGE pPage;
1531 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1532 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1533 || !CSAMDoesPageNeedScanning(pVM, (RTGCPTR)GCPtrCurPage)
1534 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1535 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1536 )
1537#endif /* else: CSAM not active */
1538 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1539 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1540 GCPtrCurPage, PteSrc.n.u1Present,
1541 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1542 PteSrc.n.u1User & PdeSrc.n.u1User,
1543 (uint64_t)PteSrc.u,
1544 (uint64_t)pPTDst->a[iPTDst].u,
1545 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1546 }
1547 }
1548 }
1549 else
1550# endif /* PGM_SYNC_N_PAGES */
1551 {
1552 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1553 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1554 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1555 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1556 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1557 GCPtrPage, PteSrc.n.u1Present,
1558 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1559 PteSrc.n.u1User & PdeSrc.n.u1User,
1560 (uint64_t)PteSrc.u,
1561 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1562 }
1563 }
1564 else /* MMIO or invalid page: emulated in #PF handler. */
1565 {
1566 LogFlow(("PGM_GCPHYS_2_PTR %VGp failed with %Vrc\n", GCPhys, rc));
1567 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1568 }
1569 }
1570 else
1571 {
1572 /*
1573 * 4/2MB page - lazy syncing shadow 4K pages.
1574 * (There are many causes of getting here, it's no longer only CSAM.)
1575 */
1576 /* Calculate the GC physical address of this 4KB shadow page. */
1577 RTGCPHYS GCPhys = (PdeSrc.u & GST_PDE_BIG_PG_MASK) | ((RTGCUINTPTR)GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1578 /* Find ram range. */
1579 PPGMPAGE pPage;
1580 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1581 if (VBOX_SUCCESS(rc))
1582 {
1583 /*
1584 * Make shadow PTE entry.
1585 */
1586 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo PAGE FLAGS */
1587 SHWPTE PteDst;
1588 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1589 | (HCPhys & X86_PTE_PAE_PG_MASK);
1590 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1591 {
1592 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1593 PteDst.n.u1Write = 0;
1594 else
1595 PteDst.u = 0;
1596 }
1597 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1598# ifdef PGMPOOL_WITH_USER_TRACKING
1599 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1600 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1601# endif
1602 pPTDst->a[iPTDst] = PteDst;
1603
1604
1605 /*
1606 * If the page is not flagged as dirty and is writable, then make it read-only
1607 * at PD level, so we can set the dirty bit when the page is modified.
1608 *
1609 * ASSUMES that page access handlers are implemented on page table entry level.
1610 * Thus we will first catch the dirty access and set PDE.D and restart. If
1611 * there is an access handler, we'll trap again and let it work on the problem.
1612 */
1613 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1614 * As for invlpg, it simply frees the whole shadow PT.
1615 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1616 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1617 {
1618 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
1619 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1620 PdeDst.n.u1Write = 0;
1621 }
1622 else
1623 {
1624 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1625 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1626 }
1627# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1628 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst] = PdeDst;
1629# else /* PAE */
1630 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst] = PdeDst;
1631# endif
1632 Log2(("SyncPage: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%VGp%s\n",
1633 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1634 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1635 }
1636 else
1637 LogFlow(("PGM_GCPHYS_2_PTR %VGp (big) failed with %Vrc\n", GCPhys, rc));
1638 }
1639 return VINF_SUCCESS;
1640 }
1641 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDNAs));
1642 }
1643 else
1644 {
1645 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDOutOfSync));
1646 Log2(("SyncPage: Out-Of-Sync PDE at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1647 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1648 }
1649
1650 /*
1651 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1652 * Yea, I'm lazy.
1653 */
1654 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1655# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1656 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst].u = 0;
1657# else /* PAE */
1658 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst].u = 0;
1659# endif
1660 PGM_INVL_GUEST_TLBS();
1661 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1662
1663#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1664 && PGM_SHW_TYPE != PGM_TYPE_NESTED
1665
1666# ifdef PGM_SYNC_N_PAGES
1667 /*
1668 * Get the shadow PDE, find the shadow page table in the pool.
1669 */
1670 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1671# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1672 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1673# else /* PAE */
1674 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1675# endif
1676 Assert(PdeDst.n.u1Present);
1677 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1678 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1679
1680# if PGM_SHW_TYPE == PGM_TYPE_PAE
1681 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1682 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1683# else
1684 const unsigned offPTSrc = 0;
1685# endif
1686
1687 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1688 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1689 {
1690 /*
1691 * This code path is currently only taken when the caller is PGMTrap0eHandler
1692 * for non-present pages!
1693 *
1694 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1695 * deal with locality.
1696 */
1697 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1698 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1699 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1700 iPTDst = 0;
1701 else
1702 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1703 for (; iPTDst < iPTDstEnd; iPTDst++)
1704 {
1705 if (!pPTDst->a[iPTDst].n.u1Present)
1706 {
1707 GSTPTE PteSrc;
1708
1709 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1710
1711 /* Fake the page table entry */
1712 PteSrc.u = GCPtrCurPage;
1713 PteSrc.n.u1Present = 1;
1714 PteSrc.n.u1Dirty = 1;
1715 PteSrc.n.u1Accessed = 1;
1716 PteSrc.n.u1Write = 1;
1717 PteSrc.n.u1User = 1;
1718
1719 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1720
1721 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1722 GCPtrCurPage, PteSrc.n.u1Present,
1723 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1724 PteSrc.n.u1User & PdeSrc.n.u1User,
1725 (uint64_t)PteSrc.u,
1726 (uint64_t)pPTDst->a[iPTDst].u,
1727 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1728 }
1729 }
1730 }
1731 else
1732# endif /* PGM_SYNC_N_PAGES */
1733 {
1734 GSTPTE PteSrc;
1735 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1736 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1737
1738 /* Fake the page table entry */
1739 PteSrc.u = GCPtrCurPage;
1740 PteSrc.n.u1Present = 1;
1741 PteSrc.n.u1Dirty = 1;
1742 PteSrc.n.u1Accessed = 1;
1743 PteSrc.n.u1Write = 1;
1744 PteSrc.n.u1User = 1;
1745 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1746
1747 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
1748 GCPtrPage, PteSrc.n.u1Present,
1749 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1750 PteSrc.n.u1User & PdeSrc.n.u1User,
1751 (uint64_t)PteSrc.u,
1752 (uint64_t)pPTDst->a[iPTDst].u,
1753 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1754 }
1755 return VINF_SUCCESS;
1756
1757#else /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1758 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
1759 return VERR_INTERNAL_ERROR;
1760#endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1761}
1762
1763
1764
1765#if PGM_WITH_PAGING(PGM_GST_TYPE)
1766
1767/**
1768 * Investigate page fault and handle write protection page faults caused by
1769 * dirty bit tracking.
1770 *
1771 * @returns VBox status code.
1772 * @param pVM VM handle.
1773 * @param uErr Page fault error code.
1774 * @param pPdeDst Shadow page directory entry.
1775 * @param pPdeSrc Guest page directory entry.
1776 * @param GCPtrPage Guest context page address.
1777 */
1778PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCUINTPTR GCPtrPage)
1779{
1780 bool fWriteProtect = !!(CPUMGetGuestCR0(pVM) & X86_CR0_WP);
1781 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
1782 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
1783 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1784# if PGM_WITH_NX(PGM_GST_TYPE)
1785 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1786# endif
1787 unsigned uPageFaultLevel;
1788 int rc;
1789
1790 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
1791 LogFlow(("CheckPageFault: GCPtrPage=%VGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
1792
1793# if PGM_GST_TYPE == PGM_TYPE_PAE \
1794 || PGM_GST_TYPE == PGM_TYPE_AMD64
1795
1796# if PGM_GST_TYPE == PGM_TYPE_AMD64
1797 PX86PML4E pPml4eSrc;
1798 PX86PDPE pPdpeSrc;
1799
1800 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc);
1801 Assert(pPml4eSrc);
1802
1803 /*
1804 * Real page fault? (PML4E level)
1805 */
1806 if ( (uErr & X86_TRAP_PF_RSVD)
1807 || !pPml4eSrc->n.u1Present
1808 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
1809 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
1810 || (fUserLevelFault && !pPml4eSrc->n.u1User)
1811 )
1812 {
1813 uPageFaultLevel = 0;
1814 goto UpperLevelPageFault;
1815 }
1816 Assert(pPdpeSrc);
1817
1818# else /* PAE */
1819 PX86PDPE pPdpeSrc = &pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[(GCPtrPage >> GST_PDPT_SHIFT) & GST_PDPT_MASK];
1820# endif
1821
1822 /*
1823 * Real page fault? (PDPE level)
1824 */
1825 if ( (uErr & X86_TRAP_PF_RSVD)
1826 || !pPdpeSrc->n.u1Present
1827# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
1828 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
1829 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
1830 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
1831# endif
1832 )
1833 {
1834 uPageFaultLevel = 1;
1835 goto UpperLevelPageFault;
1836 }
1837# endif
1838
1839 /*
1840 * Real page fault? (PDE level)
1841 */
1842 if ( (uErr & X86_TRAP_PF_RSVD)
1843 || !pPdeSrc->n.u1Present
1844# if PGM_WITH_NX(PGM_GST_TYPE)
1845 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
1846# endif
1847 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
1848 || (fUserLevelFault && !pPdeSrc->n.u1User) )
1849 {
1850 uPageFaultLevel = 2;
1851 goto UpperLevelPageFault;
1852 }
1853
1854 /*
1855 * First check the easy case where the page directory has been marked read-only to track
1856 * the dirty bit of an emulated BIG page
1857 */
1858 if (pPdeSrc->b.u1Size && fBigPagesSupported)
1859 {
1860 /* Mark guest page directory as accessed */
1861# if PGM_GST_TYPE == PGM_TYPE_AMD64
1862 pPml4eSrc->n.u1Accessed = 1;
1863 pPdpeSrc->lm.u1Accessed = 1;
1864# endif
1865 pPdeSrc->b.u1Accessed = 1;
1866
1867 /*
1868 * Only write protection page faults are relevant here.
1869 */
1870 if (fWriteFault)
1871 {
1872 /* Mark guest page directory as dirty (BIG page only). */
1873 pPdeSrc->b.u1Dirty = 1;
1874
1875 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
1876 {
1877 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
1878
1879 Assert(pPdeSrc->b.u1Write);
1880
1881 pPdeDst->n.u1Write = 1;
1882 pPdeDst->n.u1Accessed = 1;
1883 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1884 PGM_INVL_BIG_PG(GCPtrPage);
1885 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1886 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
1887 }
1888 }
1889 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1890 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
1891 }
1892 /* else: 4KB page table */
1893
1894 /*
1895 * Map the guest page table.
1896 */
1897 PGSTPT pPTSrc;
1898 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
1899 if (VBOX_SUCCESS(rc))
1900 {
1901 /*
1902 * Real page fault?
1903 */
1904 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
1905 const GSTPTE PteSrc = *pPteSrc;
1906 if ( !PteSrc.n.u1Present
1907# if PGM_WITH_NX(PGM_GST_TYPE)
1908 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
1909# endif
1910 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
1911 || (fUserLevelFault && !PteSrc.n.u1User)
1912 )
1913 {
1914# ifdef IN_GC
1915 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
1916# endif
1917 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1918 LogFlow(("CheckPageFault: real page fault at %VGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
1919
1920 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
1921 * See the 2nd case above as well.
1922 */
1923 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
1924 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
1925
1926 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1927 return VINF_EM_RAW_GUEST_TRAP;
1928 }
1929 LogFlow(("CheckPageFault: page fault at %VGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
1930
1931 /*
1932 * Set the accessed bits in the page directory and the page table.
1933 */
1934# if PGM_GST_TYPE == PGM_TYPE_AMD64
1935 pPml4eSrc->n.u1Accessed = 1;
1936 pPdpeSrc->lm.u1Accessed = 1;
1937# endif
1938 pPdeSrc->n.u1Accessed = 1;
1939 pPteSrc->n.u1Accessed = 1;
1940
1941 /*
1942 * Only write protection page faults are relevant here.
1943 */
1944 if (fWriteFault)
1945 {
1946 /* Write access, so mark guest entry as dirty. */
1947# if defined(IN_GC) && defined(VBOX_WITH_STATISTICS)
1948 if (!pPteSrc->n.u1Dirty)
1949 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtiedPage);
1950 else
1951 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageAlreadyDirty);
1952# endif
1953
1954 pPteSrc->n.u1Dirty = 1;
1955
1956 if (pPdeDst->n.u1Present)
1957 {
1958 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
1959 * Our individual shadow handlers will provide more information and force a fatal exit.
1960 */
1961 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
1962 {
1963 LogRel(("CheckPageFault: write to hypervisor region %VGv\n", GCPtrPage));
1964 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1965 return VINF_SUCCESS;
1966 }
1967
1968 /*
1969 * Map shadow page table.
1970 */
1971 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
1972 if (pShwPage)
1973 {
1974 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1975 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
1976 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
1977 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
1978 {
1979 LogFlow(("DIRTY page trap addr=%VGv\n", GCPtrPage));
1980# ifdef VBOX_STRICT
1981 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
1982 if (pPage)
1983 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
1984 ("Unexpected dirty bit tracking on monitored page %VGv (phys %VGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
1985# endif
1986 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
1987
1988 Assert(pPteSrc->n.u1Write);
1989
1990 pPteDst->n.u1Write = 1;
1991 pPteDst->n.u1Dirty = 1;
1992 pPteDst->n.u1Accessed = 1;
1993 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
1994 PGM_INVL_PG(GCPtrPage);
1995
1996 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1997 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
1998 }
1999 }
2000 else
2001 AssertMsgFailed(("pgmPoolGetPageByHCPhys %VGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2002 }
2003 }
2004/** @todo Optimize accessed bit emulation? */
2005# ifdef VBOX_STRICT
2006 /*
2007 * Sanity check.
2008 */
2009 else if ( !pPteSrc->n.u1Dirty
2010 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2011 && pPdeDst->n.u1Present)
2012 {
2013 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2014 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2015 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2016 if ( pPteDst->n.u1Present
2017 && pPteDst->n.u1Write)
2018 LogFlow(("Writable present page %VGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2019 }
2020# endif /* VBOX_STRICT */
2021 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2022 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2023 }
2024 AssertRC(rc);
2025 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2026 return rc;
2027
2028
2029UpperLevelPageFault:
2030 /* Pagefault detected while checking the PML4E, PDPE or PDE.
2031 * Single exit handler to get rid of duplicate code paths.
2032 */
2033# ifdef IN_GC
2034 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
2035# endif
2036 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
2037 LogFlow(("CheckPageFault: real page fault at %VGv (%d)\n", GCPtrPage, uPageFaultLevel));
2038
2039 if (
2040# if PGM_GST_TYPE == PGM_TYPE_AMD64
2041 pPml4eSrc->n.u1Present &&
2042# endif
2043# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2044 pPdpeSrc->n.u1Present &&
2045# endif
2046 pPdeSrc->n.u1Present)
2047 {
2048 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2049 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2050 {
2051 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2052 }
2053 else
2054 {
2055 /*
2056 * Map the guest page table.
2057 */
2058 PGSTPT pPTSrc;
2059 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2060 if (VBOX_SUCCESS(rc))
2061 {
2062 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2063 const GSTPTE PteSrc = *pPteSrc;
2064 if (pPteSrc->n.u1Present)
2065 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2066 }
2067 AssertRC(rc);
2068 }
2069 }
2070 return VINF_EM_RAW_GUEST_TRAP;
2071}
2072
2073#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
2074
2075
2076/**
2077 * Sync a shadow page table.
2078 *
2079 * The shadow page table is not present. This includes the case where
2080 * there is a conflict with a mapping.
2081 *
2082 * @returns VBox status code.
2083 * @param pVM VM handle.
2084 * @param iPD Page directory index.
2085 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2086 * Assume this is a temporary mapping.
2087 * @param GCPtrPage GC Pointer of the page that caused the fault
2088 */
2089PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PGSTPD pPDSrc, RTGCUINTPTR GCPtrPage)
2090{
2091 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2092 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPtPD[iPDSrc]);
2093 LogFlow(("SyncPT: GCPtrPage=%VGv\n", GCPtrPage));
2094
2095#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2096 || PGM_GST_TYPE == PGM_TYPE_PAE) \
2097 && PGM_SHW_TYPE != PGM_TYPE_NESTED
2098
2099 /*
2100 * Validate input a little bit.
2101 */
2102 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%VGv\n", iPDSrc, GCPtrPage));
2103# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2104 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
2105# else
2106 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2107# endif
2108 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2109 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2110 SHWPDE PdeDst = *pPdeDst;
2111
2112# ifndef PGM_WITHOUT_MAPPINGS
2113 /*
2114 * Check for conflicts.
2115 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2116 * HC: Simply resolve the conflict.
2117 */
2118 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2119 {
2120 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2121# ifndef IN_RING3
2122 Log(("SyncPT: Conflict at %VGv\n", GCPtrPage));
2123 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2124 return VERR_ADDRESS_CONFLICT;
2125# else
2126 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2127 Assert(pMapping);
2128# if PGM_GST_TYPE == PGM_TYPE_32BIT
2129 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2130# elif PGM_GST_TYPE == PGM_TYPE_PAE
2131 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2132# endif
2133 if (VBOX_FAILURE(rc))
2134 {
2135 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2136 return rc;
2137 }
2138 PdeDst = *pPdeDst;
2139# endif
2140 }
2141# else /* PGM_WITHOUT_MAPPINGS */
2142 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2143# endif /* PGM_WITHOUT_MAPPINGS */
2144 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2145
2146 /*
2147 * Sync page directory entry.
2148 */
2149 int rc = VINF_SUCCESS;
2150 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2151 if (PdeSrc.n.u1Present)
2152 {
2153 /*
2154 * Allocate & map the page table.
2155 */
2156 PSHWPT pPTDst;
2157 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2158 PPGMPOOLPAGE pShwPage;
2159 RTGCPHYS GCPhys;
2160 if (fPageTable)
2161 {
2162 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2163# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2164 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2165 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2166# endif
2167 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2168 }
2169 else
2170 {
2171 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
2172# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2173 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2174 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2175# endif
2176 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2177 }
2178 if (rc == VINF_SUCCESS)
2179 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2180 else if (rc == VINF_PGM_CACHED_PAGE)
2181 {
2182 /*
2183 * The PT was cached, just hook it up.
2184 */
2185 if (fPageTable)
2186 PdeDst.u = pShwPage->Core.Key
2187 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2188 else
2189 {
2190 PdeDst.u = pShwPage->Core.Key
2191 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2192 /* (see explanation and assumptions further down.) */
2193 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2194 {
2195 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
2196 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2197 PdeDst.b.u1Write = 0;
2198 }
2199 }
2200 *pPdeDst = PdeDst;
2201 return VINF_SUCCESS;
2202 }
2203 else if (rc == VERR_PGM_POOL_FLUSHED)
2204 return VINF_PGM_SYNC_CR3;
2205 else
2206 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
2207 PdeDst.u &= X86_PDE_AVL_MASK;
2208 PdeDst.u |= pShwPage->Core.Key;
2209
2210 /*
2211 * Page directory has been accessed (this is a fault situation, remember).
2212 */
2213 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2214 if (fPageTable)
2215 {
2216 /*
2217 * Page table - 4KB.
2218 *
2219 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2220 */
2221 Log2(("SyncPT: 4K %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2222 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2223 PGSTPT pPTSrc;
2224 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2225 if (VBOX_SUCCESS(rc))
2226 {
2227 /*
2228 * Start by syncing the page directory entry so CSAM's TLB trick works.
2229 */
2230 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2231 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2232 *pPdeDst = PdeDst;
2233
2234 /*
2235 * Directory/page user or supervisor privilege: (same goes for read/write)
2236 *
2237 * Directory Page Combined
2238 * U/S U/S U/S
2239 * 0 0 0
2240 * 0 1 0
2241 * 1 0 0
2242 * 1 1 1
2243 *
2244 * Simple AND operation. Table listed for completeness.
2245 *
2246 */
2247 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4k));
2248# ifdef PGM_SYNC_N_PAGES
2249 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2250 unsigned iPTDst = iPTBase;
2251 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
2252 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2253 iPTDst = 0;
2254 else
2255 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2256# else /* !PGM_SYNC_N_PAGES */
2257 unsigned iPTDst = 0;
2258 const unsigned iPTDstEnd = ELEMENTS(pPTDst->a);
2259# endif /* !PGM_SYNC_N_PAGES */
2260# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2261 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2262 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2263# else
2264 const unsigned offPTSrc = 0;
2265# endif
2266 for (; iPTDst < iPTDstEnd; iPTDst++)
2267 {
2268 const unsigned iPTSrc = iPTDst + offPTSrc;
2269 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2270
2271 if (PteSrc.n.u1Present) /* we've already cleared it above */
2272 {
2273# ifndef IN_RING0
2274 /*
2275 * Assuming kernel code will be marked as supervisor - and not as user level
2276 * and executed using a conforming code selector - And marked as readonly.
2277 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2278 */
2279 PPGMPAGE pPage;
2280 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2281 || !CSAMDoesPageNeedScanning(pVM, (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2282 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2283 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2284 )
2285# endif
2286 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2287 Log2(("SyncPT: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%VGp\n",
2288 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2289 PteSrc.n.u1Present,
2290 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2291 PteSrc.n.u1User & PdeSrc.n.u1User,
2292 (uint64_t)PteSrc.u,
2293 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2294 (PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)));
2295 }
2296 } /* for PTEs */
2297 }
2298 }
2299 else
2300 {
2301 /*
2302 * Big page - 2/4MB.
2303 *
2304 * We'll walk the ram range list in parallel and optimize lookups.
2305 * We will only sync on shadow page table at a time.
2306 */
2307 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4M));
2308
2309 /**
2310 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2311 */
2312
2313 /*
2314 * Start by syncing the page directory entry.
2315 */
2316 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2317 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2318
2319 /*
2320 * If the page is not flagged as dirty and is writable, then make it read-only
2321 * at PD level, so we can set the dirty bit when the page is modified.
2322 *
2323 * ASSUMES that page access handlers are implemented on page table entry level.
2324 * Thus we will first catch the dirty access and set PDE.D and restart. If
2325 * there is an access handler, we'll trap again and let it work on the problem.
2326 */
2327 /** @todo move the above stuff to a section in the PGM documentation. */
2328 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2329 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2330 {
2331 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
2332 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2333 PdeDst.b.u1Write = 0;
2334 }
2335 *pPdeDst = PdeDst;
2336
2337 /*
2338 * Fill the shadow page table.
2339 */
2340 /* Get address and flags from the source PDE. */
2341 SHWPTE PteDstBase;
2342 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2343
2344 /* Loop thru the entries in the shadow PT. */
2345 const RTGCUINTPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2346 Log2(("SyncPT: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%VGv GCPhys=%VGp %s\n",
2347 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2348 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2349 PPGMRAMRANGE pRam = CTXALLSUFF(pVM->pgm.s.pRamRanges);
2350 unsigned iPTDst = 0;
2351 while (iPTDst < ELEMENTS(pPTDst->a))
2352 {
2353 /* Advance ram range list. */
2354 while (pRam && GCPhys > pRam->GCPhysLast)
2355 pRam = CTXALLSUFF(pRam->pNext);
2356 if (pRam && GCPhys >= pRam->GCPhys)
2357 {
2358 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2359 do
2360 {
2361 /* Make shadow PTE. */
2362 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2363 SHWPTE PteDst;
2364
2365 /* Make sure the RAM has already been allocated. */
2366 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC) /** @todo PAGE FLAGS */
2367 {
2368 if (RT_UNLIKELY(!PGM_PAGE_GET_HCPHYS(pPage)))
2369 {
2370# ifdef IN_RING3
2371 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2372# else
2373 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2374# endif
2375 if (rc != VINF_SUCCESS)
2376 return rc;
2377 }
2378 }
2379
2380 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2381 {
2382 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2383 {
2384 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2385 PteDst.n.u1Write = 0;
2386 }
2387 else
2388 PteDst.u = 0;
2389 }
2390# ifndef IN_RING0
2391 /*
2392 * Assuming kernel code will be marked as supervisor and not as user level and executed
2393 * using a conforming code selector. Don't check for readonly, as that implies the whole
2394 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2395 */
2396 else if ( !PdeSrc.n.u1User
2397 && CSAMDoesPageNeedScanning(pVM, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2398 PteDst.u = 0;
2399# endif
2400 else
2401 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2402# ifdef PGMPOOL_WITH_USER_TRACKING
2403 if (PteDst.n.u1Present)
2404 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, pPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst); /** @todo PAGE FLAGS */
2405# endif
2406 /* commit it */
2407 pPTDst->a[iPTDst] = PteDst;
2408 Log4(("SyncPT: BIG %VGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2409 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2410 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2411
2412 /* advance */
2413 GCPhys += PAGE_SIZE;
2414 iHCPage++;
2415 iPTDst++;
2416 } while ( iPTDst < ELEMENTS(pPTDst->a)
2417 && GCPhys <= pRam->GCPhysLast);
2418 }
2419 else if (pRam)
2420 {
2421 Log(("Invalid pages at %VGp\n", GCPhys));
2422 do
2423 {
2424 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2425 GCPhys += PAGE_SIZE;
2426 iPTDst++;
2427 } while ( iPTDst < ELEMENTS(pPTDst->a)
2428 && GCPhys < pRam->GCPhys);
2429 }
2430 else
2431 {
2432 Log(("Invalid pages at %VGp (2)\n", GCPhys));
2433 for ( ; iPTDst < ELEMENTS(pPTDst->a); iPTDst++)
2434 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2435 }
2436 } /* while more PTEs */
2437 } /* 4KB / 4MB */
2438 }
2439 else
2440 AssertRelease(!PdeDst.n.u1Present);
2441
2442 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2443# ifdef IN_GC
2444 if (VBOX_FAILURE(rc))
2445 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPTFailed));
2446# endif
2447 return rc;
2448
2449#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2450 && PGM_SHW_TYPE != PGM_TYPE_NESTED
2451
2452 int rc = VINF_SUCCESS;
2453
2454 /*
2455 * Validate input a little bit.
2456 */
2457# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2458 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
2459# else
2460 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2461# endif
2462 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2463 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2464 SHWPDE PdeDst = *pPdeDst;
2465
2466 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2467 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2468
2469 GSTPDE PdeSrc;
2470 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2471 PdeSrc.n.u1Present = 1;
2472 PdeSrc.n.u1Write = 1;
2473 PdeSrc.n.u1Accessed = 1;
2474 PdeSrc.n.u1User = 1;
2475
2476 /*
2477 * Allocate & map the page table.
2478 */
2479 PSHWPT pPTDst;
2480 PPGMPOOLPAGE pShwPage;
2481 RTGCPHYS GCPhys;
2482
2483 /* Virtual address = physical address */
2484 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK_32;
2485 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2486
2487 if ( rc == VINF_SUCCESS
2488 || rc == VINF_PGM_CACHED_PAGE)
2489 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2490 else
2491 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
2492
2493 PdeDst.u &= X86_PDE_AVL_MASK;
2494 PdeDst.u |= pShwPage->Core.Key;
2495 PdeDst.n.u1Present = 1;
2496 PdeDst.n.u1Write = 1;
2497 PdeDst.n.u1User = 1;
2498 *pPdeDst = PdeDst;
2499
2500 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2501 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2502 return rc;
2503
2504#else /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
2505 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2506 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2507 return VERR_INTERNAL_ERROR;
2508#endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
2509}
2510
2511
2512
2513/**
2514 * Prefetch a page/set of pages.
2515 *
2516 * Typically used to sync commonly used pages before entering raw mode
2517 * after a CR3 reload.
2518 *
2519 * @returns VBox status code.
2520 * @param pVM VM handle.
2521 * @param GCPtrPage Page to invalidate.
2522 */
2523PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage)
2524{
2525 Assert(!HWACCMIsNestedPagingActive(pVM));
2526#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE) \
2527 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED
2528 /*
2529 * Check that all Guest levels thru the PDE are present, getting the
2530 * PD and PDE in the processes.
2531 */
2532 int rc = VINF_SUCCESS;
2533# if PGM_WITH_PAGING(PGM_GST_TYPE)
2534# if PGM_GST_TYPE == PGM_TYPE_32BIT
2535 const unsigned iPDSrc = (RTGCUINTPTR)GCPtrPage >> GST_PD_SHIFT;
2536 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2537# else /* PAE */
2538 unsigned iPDSrc;
2539 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
2540 if (!pPDSrc)
2541 return VINF_SUCCESS; /* not present */
2542# endif
2543 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2544# else
2545 PGSTPD pPDSrc = NULL;
2546 const unsigned iPDSrc = 0;
2547 GSTPDE PdeSrc;
2548
2549 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2550 PdeSrc.n.u1Present = 1;
2551 PdeSrc.n.u1Write = 1;
2552 PdeSrc.n.u1Accessed = 1;
2553 PdeSrc.n.u1User = 1;
2554# endif
2555
2556 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
2557 {
2558# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2559 const X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> SHW_PD_SHIFT];
2560# else
2561 const X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> SHW_PD_SHIFT];
2562# endif
2563 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
2564 {
2565 if (!PdeDst.n.u1Present)
2566 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
2567 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2568 else
2569 {
2570 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
2571 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
2572 * makes no sense to prefetch more than one page.
2573 */
2574 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2575 if (VBOX_SUCCESS(rc))
2576 rc = VINF_SUCCESS;
2577 }
2578 }
2579 }
2580 return rc;
2581
2582#else /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
2583
2584 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
2585 return VERR_INTERNAL_ERROR;
2586#endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
2587}
2588
2589
2590
2591
2592/**
2593 * Syncs a page during a PGMVerifyAccess() call.
2594 *
2595 * @returns VBox status code (informational included).
2596 * @param GCPtrPage The address of the page to sync.
2597 * @param fPage The effective guest page flags.
2598 * @param uErr The trap error code.
2599 */
2600PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR GCPtrPage, unsigned fPage, unsigned uErr)
2601{
2602 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%VGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
2603
2604 Assert(!HWACCMIsNestedPagingActive(pVM));
2605#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE) \
2606 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED
2607
2608# ifndef IN_RING0
2609 if (!(fPage & X86_PTE_US))
2610 {
2611 /*
2612 * Mark this page as safe.
2613 */
2614 /** @todo not correct for pages that contain both code and data!! */
2615 Log(("CSAMMarkPage %VGv; scanned=%d\n", GCPtrPage, true));
2616 CSAMMarkPage(pVM, (RTGCPTR)GCPtrPage, true);
2617 }
2618# endif
2619 /*
2620 * Get guest PD and index.
2621 */
2622
2623# if PGM_WITH_PAGING(PGM_GST_TYPE)
2624# if PGM_GST_TYPE == PGM_TYPE_32BIT
2625 const unsigned iPDSrc = (RTGCUINTPTR)GCPtrPage >> GST_PD_SHIFT;
2626 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2627# else /* PAE */
2628 unsigned iPDSrc;
2629 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
2630
2631 if (pPDSrc)
2632 {
2633 Log(("PGMVerifyAccess: access violation for %VGv due to non-present PDPTR\n", GCPtrPage));
2634 return VINF_EM_RAW_GUEST_TRAP;
2635 }
2636# endif
2637# else
2638 PGSTPD pPDSrc = NULL;
2639 const unsigned iPDSrc = 0;
2640# endif
2641 int rc = VINF_SUCCESS;
2642
2643 /*
2644 * First check if the shadow pd is present.
2645 */
2646# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2647 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> SHW_PD_SHIFT];
2648# else
2649 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> SHW_PD_SHIFT];
2650# endif
2651 if (!pPdeDst->n.u1Present)
2652 {
2653 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2654 AssertRC(rc);
2655 if (rc != VINF_SUCCESS)
2656 return rc;
2657 }
2658
2659# if PGM_WITH_PAGING(PGM_GST_TYPE)
2660 /* Check for dirty bit fault */
2661 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
2662 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
2663 Log(("PGMVerifyAccess: success (dirty)\n"));
2664 else
2665 {
2666 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2667#else
2668 {
2669 GSTPDE PdeSrc;
2670 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2671 PdeSrc.n.u1Present = 1;
2672 PdeSrc.n.u1Write = 1;
2673 PdeSrc.n.u1Accessed = 1;
2674 PdeSrc.n.u1User = 1;
2675
2676#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
2677 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
2678 if (uErr & X86_TRAP_PF_US)
2679 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
2680 else /* supervisor */
2681 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
2682
2683 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2684 if (VBOX_SUCCESS(rc))
2685 {
2686 /* Page was successfully synced */
2687 Log2(("PGMVerifyAccess: success (sync)\n"));
2688 rc = VINF_SUCCESS;
2689 }
2690 else
2691 {
2692 Log(("PGMVerifyAccess: access violation for %VGv rc=%d\n", GCPtrPage, rc));
2693 return VINF_EM_RAW_GUEST_TRAP;
2694 }
2695 }
2696 return rc;
2697
2698#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2699
2700 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2701 return VERR_INTERNAL_ERROR;
2702#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2703}
2704
2705
2706#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE
2707# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE
2708/**
2709 * Figures out which kind of shadow page this guest PDE warrants.
2710 *
2711 * @returns Shadow page kind.
2712 * @param pPdeSrc The guest PDE in question.
2713 * @param cr4 The current guest cr4 value.
2714 */
2715DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const GSTPDE *pPdeSrc, uint32_t cr4)
2716{
2717 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
2718 return BTH_PGMPOOLKIND_PT_FOR_PT;
2719 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
2720 //{
2721 // case 0:
2722 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
2723 // case X86_PDE4M_RW:
2724 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
2725 // case X86_PDE4M_US:
2726 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
2727 // case X86_PDE4M_RW | X86_PDE4M_US:
2728 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
2729# if 0
2730 // case X86_PDE4M_PAE_NX:
2731 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
2732 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
2733 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
2734 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
2735 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
2736 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
2737 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
2738# endif
2739 return BTH_PGMPOOLKIND_PT_FOR_BIG;
2740 //}
2741}
2742# endif
2743#endif
2744
2745#undef MY_STAM_COUNTER_INC
2746#define MY_STAM_COUNTER_INC(a) do { } while (0)
2747
2748
2749/**
2750 * Syncs the paging hierarchy starting at CR3.
2751 *
2752 * @returns VBox status code, no specials.
2753 * @param pVM The virtual machine.
2754 * @param cr0 Guest context CR0 register
2755 * @param cr3 Guest context CR3 register
2756 * @param cr4 Guest context CR4 register
2757 * @param fGlobal Including global page directories or not
2758 */
2759PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
2760{
2761#if PGM_SHW_TYPE == PGM_TYPE_NESTED
2762 /* @todo check if this is really necessary */
2763 HWACCMFlushTLB(pVM);
2764 return VINF_SUCCESS;
2765#else
2766 if (VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
2767 fGlobal = true; /* Change this CR3 reload to be a global one. */
2768
2769 /*
2770 * Update page access handlers.
2771 * The virtual are always flushed, while the physical are only on demand.
2772 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
2773 * have to look into that later because it will have a bad influence on the performance.
2774 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
2775 * bird: Yes, but that won't work for aliases.
2776 */
2777 /** @todo this MUST go away. See #1557. */
2778 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
2779 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
2780 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
2781
2782#ifdef PGMPOOL_WITH_MONITORING
2783 /*
2784 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2785 * Occationally we will have to clear all the shadow page tables because we wanted
2786 * to monitor a page which was mapped by too many shadowed page tables. This operation
2787 * sometimes refered to as a 'lightweight flush'.
2788 */
2789 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2790 pgmPoolMonitorModifiedClearAll(pVM);
2791 else
2792 {
2793# ifdef IN_RING3
2794 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2795 pgmPoolClearAll(pVM);
2796# else
2797 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2798 return VINF_PGM_SYNC_CR3;
2799# endif
2800 }
2801#endif
2802
2803 Assert(fGlobal || (cr4 & X86_CR4_PGE));
2804 MY_STAM_COUNTER_INC(fGlobal ? &pVM->pgm.s.CTXMID(Stat,SyncCR3Global) : &pVM->pgm.s.CTXMID(Stat,SyncCR3NotGlobal));
2805
2806#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE
2807 /*
2808 * Get page directory addresses.
2809 */
2810# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2811 PX86PDE pPDEDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[0];
2812# else /* PGM_SHW_TYPE == PGM_TYPE_PAE */
2813# if PGM_GST_TYPE == PGM_TYPE_32BIT
2814 PX86PDEPAE pPDEDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[0];
2815# endif
2816# endif
2817
2818# if PGM_GST_TYPE == PGM_TYPE_32BIT
2819 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2820 Assert(pPDSrc);
2821# ifndef IN_GC
2822 Assert(MMPhysGCPhys2HCVirt(pVM, (RTGCPHYS)(cr3 & GST_CR3_PAGE_MASK), sizeof(*pPDSrc)) == pPDSrc);
2823# endif
2824# endif
2825
2826 /*
2827 * Iterate the page directory.
2828 */
2829 PPGMMAPPING pMapping;
2830 unsigned iPdNoMapping;
2831 const bool fRawR0Enabled = EMIsRawRing0Enabled(pVM);
2832 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2833
2834 /* Only check mappings if they are supposed to be put into the shadow page table. */
2835 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
2836 {
2837 pMapping = pVM->pgm.s.CTXALLSUFF(pMappings);
2838 iPdNoMapping = (pMapping) ? (pMapping->GCPtr >> GST_PD_SHIFT) : ~0U;
2839 }
2840 else
2841 {
2842 pMapping = 0;
2843 iPdNoMapping = ~0U;
2844 }
2845# if PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
2846 for (unsigned iPDPTE = 0; iPDPTE < GST_PDPE_ENTRIES; iPDPTE++)
2847 {
2848 unsigned iPDSrc;
2849# if PGM_SHW_TYPE == PGM_TYPE_PAE
2850 PX86PDPAE pPDPAE = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2851# else
2852 AssertFailed(); /* @todo */
2853 PX86PDPE pPDPAE = pVM->pgm.s.CTXMID(ap,PaePDs)[iPDPTE * X86_PG_AMD64_ENTRIES];
2854# endif
2855 PX86PDEPAE pPDEDst = &pPDPAE->a[iPDPTE * X86_PG_PAE_ENTRIES];
2856 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, iPDPTE << X86_PDPT_SHIFT, &iPDSrc);
2857
2858 if (pPDSrc == NULL)
2859 {
2860 /* PDPT not present */
2861 if (pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].n.u1Present)
2862 {
2863 for (unsigned iPD = 0; iPD < ELEMENTS(pPDSrc->a); iPD++)
2864 {
2865 if ( pPDEDst[iPD].n.u1Present
2866 && !(pPDEDst[iPD].u & PGM_PDFLAGS_MAPPING))
2867 {
2868 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst[iPD].u & SHW_PDE_PG_MASK), SHW_POOL_ROOT_IDX, iPDPTE * X86_PG_PAE_ENTRIES + iPD);
2869 pPDEDst[iPD].u = 0;
2870 }
2871 }
2872 }
2873 if (!(pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].u & PGM_PLXFLAGS_MAPPING))
2874 pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].n.u1Present = 0;
2875 continue;
2876 }
2877# else /* PGM_GST_TYPE != PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_AMD64 */
2878 {
2879# endif /* PGM_GST_TYPE != PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_AMD64 */
2880 for (unsigned iPD = 0; iPD < ELEMENTS(pPDSrc->a); iPD++)
2881 {
2882# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2883 Assert(&pVM->pgm.s.CTXMID(p,32BitPD)->a[iPD] == pPDEDst);
2884# elif PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2885 AssertMsg(&pVM->pgm.s.CTXMID(ap,PaePDs)[iPD * 2 / 512]->a[iPD * 2 % 512] == pPDEDst, ("%p vs %p\n", &pVM->pgm.s.CTXMID(ap,PaePDs)[iPD * 2 / 512]->a[iPD * 2 % 512], pPDEDst));
2886# endif
2887 register GSTPDE PdeSrc = pPDSrc->a[iPD];
2888 if ( PdeSrc.n.u1Present
2889 && (PdeSrc.n.u1User || fRawR0Enabled))
2890 {
2891# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2892 || PGM_GST_TYPE == PGM_TYPE_PAE) \
2893 && !defined(PGM_WITHOUT_MAPPINGS)
2894
2895 /*
2896 * Check for conflicts with GC mappings.
2897 */
2898# if PGM_GST_TYPE == PGM_TYPE_PAE
2899 if (iPD + iPDPTE * X86_PG_PAE_ENTRIES == iPdNoMapping)
2900# else
2901 if (iPD == iPdNoMapping)
2902# endif
2903 {
2904 if (pVM->pgm.s.fMappingsFixed)
2905 {
2906 /* It's fixed, just skip the mapping. */
2907 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
2908 iPD += cPTs - 1;
2909 pPDEDst += cPTs + (PGM_GST_TYPE != PGM_SHW_TYPE) * cPTs; /* Only applies to the pae shadow and 32 bits guest case */
2910 pMapping = pMapping->CTXALLSUFF(pNext);
2911 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
2912 continue;
2913 }
2914# ifdef IN_RING3
2915# if PGM_GST_TYPE == PGM_TYPE_32BIT
2916 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
2917# elif PGM_GST_TYPE == PGM_TYPE_PAE
2918 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPDPTE << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
2919# endif
2920 if (VBOX_FAILURE(rc))
2921 return rc;
2922
2923 /*
2924 * Update iPdNoMapping and pMapping.
2925 */
2926 pMapping = pVM->pgm.s.pMappingsR3;
2927 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
2928 pMapping = pMapping->pNextR3;
2929 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
2930# else
2931 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
2932 return VINF_PGM_SYNC_CR3;
2933# endif
2934 }
2935# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
2936 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2937# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
2938 /*
2939 * Sync page directory entry.
2940 *
2941 * The current approach is to allocated the page table but to set
2942 * the entry to not-present and postpone the page table synching till
2943 * it's actually used.
2944 */
2945# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2946 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
2947# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
2948 const unsigned iPdShw = iPD + iPDPTE * X86_PG_PAE_ENTRIES; NOREF(iPdShw);
2949# else
2950 const unsigned iPdShw = iPD; NOREF(iPdShw);
2951# endif
2952 {
2953 SHWPDE PdeDst = *pPDEDst;
2954 if (PdeDst.n.u1Present)
2955 {
2956 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2957 RTGCPHYS GCPhys;
2958 if ( !PdeSrc.b.u1Size
2959 || !(cr4 & X86_CR4_PSE))
2960 {
2961 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2962# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2963 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2964 GCPhys |= i * (PAGE_SIZE / 2);
2965# endif
2966 }
2967 else
2968 {
2969 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
2970# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2971 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2972 GCPhys |= i * X86_PAGE_2M_SIZE;
2973# endif
2974 }
2975
2976 if ( pShwPage->GCPhys == GCPhys
2977 && pShwPage->enmKind == PGM_BTH_NAME(CalcPageKind)(&PdeSrc, cr4)
2978 && ( pShwPage->fCached
2979 || ( !fGlobal
2980 && ( false
2981# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
2982 || ( (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
2983 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE)) /* global 2/4MB page. */
2984 || ( !pShwPage->fSeenNonGlobal
2985 && (cr4 & X86_CR4_PGE))
2986# endif
2987 )
2988 )
2989 )
2990 && ( (PdeSrc.u & (X86_PDE_US | X86_PDE_RW)) == (PdeDst.u & (X86_PDE_US | X86_PDE_RW))
2991 || ( (cr4 & X86_CR4_PSE)
2992 && ((PdeSrc.u & (X86_PDE_US | X86_PDE4M_PS | X86_PDE4M_D)) | PGM_PDFLAGS_TRACK_DIRTY)
2993 == ((PdeDst.u & (X86_PDE_US | X86_PDE_RW | PGM_PDFLAGS_TRACK_DIRTY)) | X86_PDE4M_PS))
2994 )
2995 )
2996 {
2997# ifdef VBOX_WITH_STATISTICS
2998 if ( !fGlobal
2999 && (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
3000 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE))
3001 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPD));
3002 else if (!fGlobal && !pShwPage->fSeenNonGlobal && (cr4 & X86_CR4_PGE))
3003 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPT));
3004 else
3005 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstCacheHit));
3006# endif /* VBOX_WITH_STATISTICS */
3007 /** @todo a replacement strategy isn't really needed unless we're using a very small pool < 512 pages.
3008 * The whole ageing stuff should be put in yet another set of #ifdefs. For now, let's just skip it. */
3009 //# ifdef PGMPOOL_WITH_CACHE
3010 // pgmPoolCacheUsed(pPool, pShwPage);
3011 //# endif
3012 }
3013 else
3014 {
3015 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPdShw);
3016 pPDEDst->u = 0;
3017 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreed));
3018 }
3019 }
3020 else
3021 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstNotPresent));
3022 pPDEDst++;
3023 }
3024 }
3025# if PGM_GST_TYPE == PGM_TYPE_PAE
3026 else if (iPD + iPDPTE * X86_PG_PAE_ENTRIES != iPdNoMapping)
3027# else
3028 else if (iPD != iPdNoMapping)
3029# endif
3030 {
3031 /*
3032 * Check if there is any page directory to mark not present here.
3033 */
3034# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3035 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
3036# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3037 const unsigned iPdShw = iPD + iPDPTE * X86_PG_PAE_ENTRIES; NOREF(iPdShw);
3038# else
3039 const unsigned iPdShw = iPD; NOREF(iPdShw);
3040# endif
3041 {
3042 if (pPDEDst->n.u1Present)
3043 {
3044 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst->u & SHW_PDE_PG_MASK), SHW_POOL_ROOT_IDX, iPdShw);
3045 pPDEDst->u = 0;
3046 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreedSrcNP));
3047 }
3048 pPDEDst++;
3049 }
3050 }
3051 else
3052 {
3053# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3054 || PGM_GST_TYPE == PGM_TYPE_PAE) \
3055 && !defined(PGM_WITHOUT_MAPPINGS)
3056
3057 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
3058
3059 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3060 if (pVM->pgm.s.fMappingsFixed)
3061 {
3062 /* It's fixed, just skip the mapping. */
3063 pMapping = pMapping->CTXALLSUFF(pNext);
3064 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3065 }
3066 else
3067 {
3068 /*
3069 * Check for conflicts for subsequent pagetables
3070 * and advance to the next mapping.
3071 */
3072 iPdNoMapping = ~0U;
3073 unsigned iPT = cPTs;
3074 while (iPT-- > 1)
3075 {
3076 if ( pPDSrc->a[iPD + iPT].n.u1Present
3077 && (pPDSrc->a[iPD + iPT].n.u1User || fRawR0Enabled))
3078 {
3079# ifdef IN_RING3
3080# if PGM_GST_TYPE == PGM_TYPE_32BIT
3081 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
3082# elif PGM_GST_TYPE == PGM_TYPE_PAE
3083 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPDPTE << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
3084# endif
3085 if (VBOX_FAILURE(rc))
3086 return rc;
3087
3088 /*
3089 * Update iPdNoMapping and pMapping.
3090 */
3091 pMapping = pVM->pgm.s.CTXALLSUFF(pMappings);
3092 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
3093 pMapping = pMapping->CTXALLSUFF(pNext);
3094 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3095 break;
3096# else
3097 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3098 return VINF_PGM_SYNC_CR3;
3099# endif
3100 }
3101 }
3102 if (iPdNoMapping == ~0U && pMapping)
3103 {
3104 pMapping = pMapping->CTXALLSUFF(pNext);
3105 if (pMapping)
3106 iPdNoMapping = pMapping->GCPtr >> GST_PD_SHIFT;
3107 }
3108 }
3109
3110 /* advance. */
3111 iPD += cPTs - 1;
3112 pPDEDst += cPTs + (PGM_GST_TYPE != PGM_SHW_TYPE) * cPTs; /* Only applies to the pae shadow and 32 bits guest case */
3113# if PGM_GST_TYPE != PGM_SHW_TYPE
3114 AssertCompile(PGM_GST_TYPE == PGM_TYPE_32BIT && PGM_SHW_TYPE == PGM_TYPE_PAE);
3115# endif
3116# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3117 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3118# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3119 }
3120
3121 } /* for iPD */
3122 } /* for each PDPTE (PAE) */
3123
3124 return VINF_SUCCESS;
3125
3126# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3127//# error not implemented
3128 return VERR_INTERNAL_ERROR;
3129# else /* guest real and protected mode */
3130 return VINF_SUCCESS;
3131# endif
3132#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED */
3133}
3134
3135
3136
3137
3138#ifdef VBOX_STRICT
3139#ifdef IN_GC
3140# undef AssertMsgFailed
3141# define AssertMsgFailed Log
3142#endif
3143#ifdef IN_RING3
3144# include <VBox/dbgf.h>
3145
3146/**
3147 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3148 *
3149 * @returns VBox status code (VINF_SUCCESS).
3150 * @param pVM The VM handle.
3151 * @param cr3 The root of the hierarchy.
3152 * @param crr The cr4, only PAE and PSE is currently used.
3153 * @param fLongMode Set if long mode, false if not long mode.
3154 * @param cMaxDepth Number of levels to dump.
3155 * @param pHlp Pointer to the output functions.
3156 */
3157__BEGIN_DECLS
3158PGMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3159__END_DECLS
3160
3161#endif
3162
3163/**
3164 * Checks that the shadow page table is in sync with the guest one.
3165 *
3166 * @returns The number of errors.
3167 * @param pVM The virtual machine.
3168 * @param cr3 Guest context CR3 register
3169 * @param cr4 Guest context CR4 register
3170 * @param GCPtr Where to start. Defaults to 0.
3171 * @param cb How much to check. Defaults to everything.
3172 */
3173PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCUINTPTR GCPtr, RTGCUINTPTR cb)
3174{
3175#if PGM_SHW_TYPE == PGM_TYPE_NESTED
3176 return 0;
3177#else
3178 unsigned cErrors = 0;
3179
3180#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3181 || PGM_GST_TYPE == PGM_TYPE_PAE
3182
3183 PPGM pPGM = &pVM->pgm.s;
3184 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3185 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3186# ifndef IN_RING0
3187 RTHCPHYS HCPhys; /* general usage. */
3188# endif
3189 int rc;
3190
3191 /*
3192 * Check that the Guest CR3 and all its mappings are correct.
3193 */
3194 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3195 ("Invalid GCPhysCR3=%VGp cr3=%VGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3196 false);
3197# ifndef IN_RING0
3198# if PGM_GST_TYPE == PGM_TYPE_32BIT
3199 rc = PGMShwGetPage(pVM, pPGM->pGuestPDGC, NULL, &HCPhysShw);
3200# else
3201 rc = PGMShwGetPage(pVM, pPGM->pGstPaePDPTGC, NULL, &HCPhysShw);
3202# endif
3203 AssertRCReturn(rc, 1);
3204 HCPhys = NIL_RTHCPHYS;
3205 rc = pgmRamGCPhys2HCPhys(pPGM, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3206 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%VHp HCPhyswShw=%VHp (cr3)\n", HCPhys, HCPhysShw), false);
3207# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3208 RTGCPHYS GCPhys;
3209 rc = PGMR3DbgHCPtr2GCPhys(pVM, pPGM->pGuestPDHC, &GCPhys);
3210 AssertRCReturn(rc, 1);
3211 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%VGp cr3=%VGp\n", GCPhys, (RTGCPHYS)cr3), false);
3212# endif
3213#endif /* !IN_RING0 */
3214
3215# if PGM_GST_TYPE == PGM_TYPE_32BIT
3216 const GSTPD *pPDSrc = CTXSUFF(pPGM->pGuestPD);
3217# endif
3218
3219 /*
3220 * Get and check the Shadow CR3.
3221 */
3222# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3223 const X86PD *pPDDst = pPGM->CTXMID(p,32BitPD);
3224 unsigned cPDEs = ELEMENTS(pPDDst->a);
3225# else
3226 const X86PDPAE *pPDDst = pPGM->CTXMID(ap,PaePDs[0]); /* use it as a 2048 entry PD */
3227 unsigned cPDEs = ELEMENTS(pPDDst->a) * ELEMENTS(pPGM->apHCPaePDs);
3228# endif
3229 if (cb != ~(RTGCUINTPTR)0)
3230 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3231
3232/** @todo call the other two PGMAssert*() functions. */
3233
3234# if PGM_GST_TYPE == PGM_TYPE_PAE
3235 /*
3236 * Check the 4 PDPTs too.
3237 */
3238 for (unsigned i = 0; i < 4; i++)
3239 {
3240 RTHCPTR HCPtr;
3241 RTHCPHYS HCPhys;
3242 RTGCPHYS GCPhys = pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[i].u & X86_PDPE_PG_MASK;
3243 int rc2 = pgmRamGCPhys2HCPtrAndHCPhysWithFlags(&pVM->pgm.s, GCPhys, &HCPtr, &HCPhys);
3244 if (VBOX_SUCCESS(rc2))
3245 {
3246 AssertMsg( pVM->pgm.s.apGstPaePDsHC[i] == (R3R0PTRTYPE(PX86PDPAE))HCPtr
3247 && pVM->pgm.s.aGCPhysGstPaePDs[i] == GCPhys,
3248 ("idx %d apGstPaePDsHC %VHv vs %VHv aGCPhysGstPaePDs %VGp vs %VGp\n",
3249 i, pVM->pgm.s.apGstPaePDsHC[i], HCPtr, pVM->pgm.s.aGCPhysGstPaePDs[i], GCPhys));
3250 }
3251 }
3252# endif
3253
3254 /*
3255 * Iterate the shadow page directory.
3256 */
3257 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3258 unsigned iPDDst = GCPtr >> SHW_PD_SHIFT;
3259 cPDEs += iPDDst;
3260 for (;
3261 iPDDst < cPDEs;
3262 iPDDst++, GCPtr += _4G / cPDEs)
3263 {
3264# if PGM_GST_TYPE == PGM_TYPE_PAE
3265 uint32_t iPDSrc;
3266 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pPGM, (RTGCUINTPTR)GCPtr, &iPDSrc);
3267 if (!pPDSrc)
3268 {
3269 AssertMsg(!pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[(GCPtr >> GST_PDPT_SHIFT) & GST_PDPT_MASK].n.u1Present, ("Guest PDTPR not present, shadow PDPTR %VX64\n", pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[(GCPtr >> GST_PDPT_SHIFT) & GST_PDPT_MASK].u));
3270 continue;
3271 }
3272#endif
3273
3274 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3275 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3276 {
3277 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3278 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3279 {
3280 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3281 cErrors++;
3282 continue;
3283 }
3284 }
3285 else if ( (PdeDst.u & X86_PDE_P)
3286 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3287 )
3288 {
3289 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3290 PPGMPOOLPAGE pPoolPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
3291 if (!pPoolPage)
3292 {
3293 AssertMsgFailed(("Invalid page table address %VGp at %VGv! PdeDst=%#RX64\n",
3294 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3295 cErrors++;
3296 continue;
3297 }
3298 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3299
3300 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3301 {
3302 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %VGv! These flags are not virtualized! PdeDst=%#RX64\n",
3303 GCPtr, (uint64_t)PdeDst.u));
3304 cErrors++;
3305 }
3306
3307 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3308 {
3309 AssertMsgFailed(("4K PDE reserved flags at %VGv! PdeDst=%#RX64\n",
3310 GCPtr, (uint64_t)PdeDst.u));
3311 cErrors++;
3312 }
3313
3314 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3315 if (!PdeSrc.n.u1Present)
3316 {
3317 AssertMsgFailed(("Guest PDE at %VGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3318 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3319 cErrors++;
3320 continue;
3321 }
3322
3323 if ( !PdeSrc.b.u1Size
3324 || !(cr4 & X86_CR4_PSE))
3325 {
3326 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3327# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3328 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3329# endif
3330 }
3331 else
3332 {
3333# if PGM_GST_TYPE == PGM_TYPE_32BIT
3334 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3335 {
3336 AssertMsgFailed(("Guest PDE at %VGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3337 GCPtr, (uint64_t)PdeSrc.u));
3338 cErrors++;
3339 continue;
3340 }
3341# endif
3342 GCPhysGst = PdeSrc.u & GST_PDE_BIG_PG_MASK;
3343# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3344 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3345# endif
3346 }
3347
3348 if ( pPoolPage->enmKind
3349 != (!PdeSrc.b.u1Size || !(cr4 & X86_CR4_PSE) ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3350 {
3351 AssertMsgFailed(("Invalid shadow page table kind %d at %VGv! PdeSrc=%#RX64\n",
3352 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3353 cErrors++;
3354 }
3355
3356 PPGMPAGE pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3357 if (!pPhysPage)
3358 {
3359 AssertMsgFailed(("Cannot find guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3360 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3361 cErrors++;
3362 continue;
3363 }
3364
3365 if (GCPhysGst != pPoolPage->GCPhys)
3366 {
3367 AssertMsgFailed(("GCPhysGst=%VGp != pPage->GCPhys=%VGp at %VGv\n",
3368 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3369 cErrors++;
3370 continue;
3371 }
3372
3373 if ( !PdeSrc.b.u1Size
3374 || !(cr4 & X86_CR4_PSE))
3375 {
3376 /*
3377 * Page Table.
3378 */
3379 const GSTPT *pPTSrc;
3380 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3381 if (VBOX_FAILURE(rc))
3382 {
3383 AssertMsgFailed(("Cannot map/convert guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3384 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3385 cErrors++;
3386 continue;
3387 }
3388 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3389 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3390 {
3391 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3392 // (This problem will go away when/if we shadow multiple CR3s.)
3393 AssertMsgFailed(("4K PDE flags mismatch at %VGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3394 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3395 cErrors++;
3396 continue;
3397 }
3398 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3399 {
3400 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%VGv PdeDst=%#RX64\n",
3401 GCPtr, (uint64_t)PdeDst.u));
3402 cErrors++;
3403 continue;
3404 }
3405
3406 /* iterate the page table. */
3407# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3408 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3409 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3410# else
3411 const unsigned offPTSrc = 0;
3412# endif
3413 for (unsigned iPT = 0, off = 0;
3414 iPT < ELEMENTS(pPTDst->a);
3415 iPT++, off += PAGE_SIZE)
3416 {
3417 const SHWPTE PteDst = pPTDst->a[iPT];
3418
3419 /* skip not-present entries. */
3420 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3421 continue;
3422 Assert(PteDst.n.u1Present);
3423
3424 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3425 if (!PteSrc.n.u1Present)
3426 {
3427#ifdef IN_RING3
3428 PGMAssertHandlerAndFlagsInSync(pVM);
3429 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3430#endif
3431 AssertMsgFailed(("Out of sync (!P) PTE at %VGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%VGv iPTSrc=%x PdeSrc=%x physpte=%VGp\n",
3432 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3433 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3434 cErrors++;
3435 continue;
3436 }
3437
3438 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3439# if 1 /** @todo sync accessed bit properly... */
3440 fIgnoreFlags |= X86_PTE_A;
3441# endif
3442
3443 /* match the physical addresses */
3444 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3445 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3446
3447# ifdef IN_RING3
3448 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3449 if (VBOX_FAILURE(rc))
3450 {
3451 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3452 {
3453 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3454 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3455 cErrors++;
3456 continue;
3457 }
3458 }
3459 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3460 {
3461 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3462 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3463 cErrors++;
3464 continue;
3465 }
3466# endif
3467
3468 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3469 if (!pPhysPage)
3470 {
3471# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3472 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3473 {
3474 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3475 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3476 cErrors++;
3477 continue;
3478 }
3479# endif
3480 if (PteDst.n.u1Write)
3481 {
3482 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3483 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3484 cErrors++;
3485 }
3486 fIgnoreFlags |= X86_PTE_RW;
3487 }
3488 else if (HCPhysShw != (PGM_PAGE_GET_HCPHYS(pPhysPage) & SHW_PTE_PG_MASK))
3489 {
3490 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3491 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3492 cErrors++;
3493 continue;
3494 }
3495
3496 /* flags */
3497 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3498 {
3499 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3500 {
3501 if (PteDst.n.u1Write)
3502 {
3503 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PteSrc=%#RX64 PteDst=%#RX64\n",
3504 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3505 cErrors++;
3506 continue;
3507 }
3508 fIgnoreFlags |= X86_PTE_RW;
3509 }
3510 else
3511 {
3512 if (PteDst.n.u1Present)
3513 {
3514 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VHp PteSrc=%#RX64 PteDst=%#RX64\n",
3515 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3516 cErrors++;
3517 continue;
3518 }
3519 fIgnoreFlags |= X86_PTE_P;
3520 }
3521 }
3522 else
3523 {
3524 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3525 {
3526 if (PteDst.n.u1Write)
3527 {
3528 AssertMsgFailed(("!DIRTY page at %VGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3529 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3530 cErrors++;
3531 continue;
3532 }
3533 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3534 {
3535 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3536 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3537 cErrors++;
3538 continue;
3539 }
3540 if (PteDst.n.u1Dirty)
3541 {
3542 AssertMsgFailed(("!DIRTY page at %VGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3543 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3544 cErrors++;
3545 }
3546# if 0 /** @todo sync access bit properly... */
3547 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3548 {
3549 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3550 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3551 cErrors++;
3552 }
3553 fIgnoreFlags |= X86_PTE_RW;
3554# else
3555 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3556# endif
3557 }
3558 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3559 {
3560 /* access bit emulation (not implemented). */
3561 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3562 {
3563 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3564 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3565 cErrors++;
3566 continue;
3567 }
3568 if (!PteDst.n.u1Accessed)
3569 {
3570 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3571 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3572 cErrors++;
3573 }
3574 fIgnoreFlags |= X86_PTE_P;
3575 }
3576# ifdef DEBUG_sandervl
3577 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3578# endif
3579 }
3580
3581 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3582 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
3583 )
3584 {
3585 AssertMsgFailed(("Flags mismatch at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3586 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3587 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3588 cErrors++;
3589 continue;
3590 }
3591 } /* foreach PTE */
3592 }
3593 else
3594 {
3595 /*
3596 * Big Page.
3597 */
3598 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
3599 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
3600 {
3601 if (PdeDst.n.u1Write)
3602 {
3603 AssertMsgFailed(("!DIRTY page at %VGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3604 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3605 cErrors++;
3606 continue;
3607 }
3608 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
3609 {
3610 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3611 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3612 cErrors++;
3613 continue;
3614 }
3615# if 0 /** @todo sync access bit properly... */
3616 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
3617 {
3618 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3619 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3620 cErrors++;
3621 }
3622 fIgnoreFlags |= X86_PTE_RW;
3623# else
3624 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3625# endif
3626 }
3627 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3628 {
3629 /* access bit emulation (not implemented). */
3630 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
3631 {
3632 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3633 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3634 cErrors++;
3635 continue;
3636 }
3637 if (!PdeDst.n.u1Accessed)
3638 {
3639 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3640 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3641 cErrors++;
3642 }
3643 fIgnoreFlags |= X86_PTE_P;
3644 }
3645
3646 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
3647 {
3648 AssertMsgFailed(("Flags mismatch (B) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
3649 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
3650 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3651 cErrors++;
3652 }
3653
3654 /* iterate the page table. */
3655 for (unsigned iPT = 0, off = 0;
3656 iPT < ELEMENTS(pPTDst->a);
3657 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
3658 {
3659 const SHWPTE PteDst = pPTDst->a[iPT];
3660
3661 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3662 {
3663 AssertMsgFailed(("The PTE at %VGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
3664 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3665 cErrors++;
3666 }
3667
3668 /* skip not-present entries. */
3669 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
3670 continue;
3671
3672 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3673
3674 /* match the physical addresses */
3675 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
3676
3677# ifdef IN_RING3
3678 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3679 if (VBOX_FAILURE(rc))
3680 {
3681 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3682 {
3683 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3684 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3685 cErrors++;
3686 }
3687 }
3688 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
3689 {
3690 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3691 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3692 cErrors++;
3693 continue;
3694 }
3695# endif
3696
3697 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3698 if (!pPhysPage)
3699 {
3700# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3701 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3702 {
3703 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3704 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3705 cErrors++;
3706 continue;
3707 }
3708# endif
3709 if (PteDst.n.u1Write)
3710 {
3711 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3712 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3713 cErrors++;
3714 }
3715 fIgnoreFlags |= X86_PTE_RW;
3716 }
3717 else if (HCPhysShw != (pPhysPage->HCPhys & X86_PTE_PAE_PG_MASK))
3718 {
3719 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3720 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3721 cErrors++;
3722 continue;
3723 }
3724
3725 /* flags */
3726 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3727 {
3728 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3729 {
3730 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
3731 {
3732 if (PteDst.n.u1Write)
3733 {
3734 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
3735 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3736 cErrors++;
3737 continue;
3738 }
3739 fIgnoreFlags |= X86_PTE_RW;
3740 }
3741 }
3742 else
3743 {
3744 if (PteDst.n.u1Present)
3745 {
3746 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
3747 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3748 cErrors++;
3749 continue;
3750 }
3751 fIgnoreFlags |= X86_PTE_P;
3752 }
3753 }
3754
3755 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3756 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
3757 )
3758 {
3759 AssertMsgFailed(("Flags mismatch (BT) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
3760 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3761 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3762 cErrors++;
3763 continue;
3764 }
3765 } /* foreach PTE */
3766 }
3767 }
3768 /* not present */
3769
3770 } /* forearch PDE */
3771
3772# ifdef DEBUG
3773 if (cErrors)
3774 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
3775# endif
3776
3777#elif PGM_GST_TYPE == PGM_TYPE_PAE
3778//# error not implemented
3779
3780
3781#elif PGM_GST_TYPE == PGM_TYPE_AMD64
3782//# error not implemented
3783
3784/*#else: guest real and protected mode */
3785#endif
3786 return cErrors;
3787
3788#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED */
3789}
3790#endif /* VBOX_STRICT */
3791
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette