VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 9570

Last change on this file since 9570 was 9570, checked in by vboxsync, 17 years ago

AMD64 paging updates

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 170.3 KB
Line 
1/* $Id: PGMAllBth.h 9570 2008-06-10 13:30:03Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27__BEGIN_DECLS
28PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCUINTPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PGSTPD pPDSrc, RTGCUINTPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCUINTPTR GCPtr = 0, RTGCUINTPTR cb = ~(RTGCUINTPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42__END_DECLS
43
44
45/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
46#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED
47# error "Invalid combination; PAE guest implies PAE shadow"
48#endif
49
50#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
51 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED)
52# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
53#endif
54
55#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
56 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED)
57# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
58#endif
59
60#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED) \
61 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
62# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
63#endif
64
65#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
66# define PGM_WITHOUT_MAPPINGS
67#endif
68
69/**
70 * #PF Handler for raw-mode guest execution.
71 *
72 * @returns VBox status code (appropriate for trap handling and GC return).
73 * @param pVM VM Handle.
74 * @param uErr The trap error code.
75 * @param pRegFrame Trap register frame.
76 * @param pvFault The fault address.
77 */
78PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
79{
80#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
81 && PGM_SHW_TYPE != PGM_TYPE_NESTED
82
83# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
84 /*
85 * Hide the instruction fetch trap indicator for now.
86 */
87 /** @todo NXE will change this and we must fix NXE in the switcher too! */
88 if (uErr & X86_TRAP_PF_ID)
89 {
90 uErr &= ~X86_TRAP_PF_ID;
91 TRPMSetErrorCode(pVM, uErr);
92 }
93# endif
94
95 /*
96 * Get PDs.
97 */
98 int rc;
99# if PGM_WITH_PAGING(PGM_GST_TYPE)
100# if PGM_GST_TYPE == PGM_TYPE_32BIT
101 const unsigned iPDSrc = (RTGCUINTPTR)pvFault >> GST_PD_SHIFT;
102 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
103
104# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
105
106# if PGM_GST_TYPE == PGM_TYPE_PAE
107 unsigned iPDSrc;
108 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, (RTGCUINTPTR)pvFault, &iPDSrc);
109
110# elif PGM_GST_TYPE == PGM_TYPE_AMD64
111 unsigned iPDSrc;
112 PX86PML4E pPml4e;
113 X86PDPE Pdpe;
114 PGSTPD pPDSrc;
115
116 pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, pvFault, &pPml4e, &Pdpe, &iPDSrc);
117 Assert(pPml4e);
118# endif
119 /* Quick check for a valid guest trap. */
120 if (!pPDSrc)
121 {
122 LogFlow(("Trap0eHandler: guest PDPTR not present CR3=%VGp\n", (CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK)));
123 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eGuestTrap; });
124 TRPMSetErrorCode(pVM, uErr);
125 return VINF_EM_RAW_GUEST_TRAP;
126 }
127# endif
128# else
129 PGSTPD pPDSrc = NULL;
130 const unsigned iPDSrc = 0;
131# endif
132
133# if PGM_SHW_TYPE == PGM_TYPE_32BIT
134 const unsigned iPDDst = (RTGCUINTPTR)pvFault >> SHW_PD_SHIFT;
135 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
136# elif PGM_SHW_TYPE == PGM_TYPE_PAE
137 const unsigned iPDDst = (RTGCUINTPTR)pvFault >> SHW_PD_SHIFT;
138 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]; /* We treat this as a PD with 2048 entries, so no need to and with SHW_PD_MASK to get iPDDst */
139
140# if PGM_GST_TYPE == PGM_TYPE_PAE
141 /* Did we mark the PDPT as not present in SyncCR3? */
142 unsigned iPDPTE = ((RTGCUINTPTR)pvFault >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
143 if (!pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].n.u1Present)
144 pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].n.u1Present = 1;
145
146# endif
147
148# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
149 const unsigned iPDDst = (((RTGCUINTPTR)pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
150 PX86PDPAE pPDDst;
151
152 rc = PGMShwGetAllocLongModePDPtr(pVM, (RTGCUINTPTR)pvFault, &pPDDst);
153 if (rc != VINF_SUCCESS)
154 {
155 AssertMsg(rc == VINF_PGM_SYNC_CR3, ("Unexpected rc=%Vrc\n", rc));
156 return rc;
157 }
158 Assert(pPDDst);
159# endif
160
161# if PGM_WITH_PAGING(PGM_GST_TYPE)
162 /*
163 * If we successfully correct the write protection fault due to dirty bit
164 * tracking, or this page fault is a genuine one, then return immediately.
165 */
166 STAM_PROFILE_START(&pVM->pgm.s.StatCheckPageFault, e);
167 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], (RTGCUINTPTR)pvFault);
168 STAM_PROFILE_STOP(&pVM->pgm.s.StatCheckPageFault, e);
169 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
170 || rc == VINF_EM_RAW_GUEST_TRAP)
171 {
172 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution)
173 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatTrap0eDirtyAndAccessedBits : &pVM->pgm.s.StatTrap0eGuestTrap; });
174 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
175 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
176 }
177
178 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0ePD[iPDSrc]);
179# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
180
181 /*
182 * A common case is the not-present error caused by lazy page table syncing.
183 *
184 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
185 * so we can safely assume that the shadow PT is present when calling SyncPage later.
186 *
187 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
188 * of mapping conflict and defer to SyncCR3 in R3.
189 * (Again, we do NOT support access handlers for non-present guest pages.)
190 *
191 */
192# if PGM_WITH_PAGING(PGM_GST_TYPE)
193 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
194# else
195 GSTPDE PdeSrc;
196 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
197 PdeSrc.n.u1Present = 1;
198 PdeSrc.n.u1Write = 1;
199 PdeSrc.n.u1Accessed = 1;
200 PdeSrc.n.u1User = 1;
201# endif
202 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
203 && !pPDDst->a[iPDDst].n.u1Present
204 && PdeSrc.n.u1Present
205 )
206
207 {
208 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eSyncPT; });
209 STAM_PROFILE_START(&pVM->pgm.s.StatLazySyncPT, f);
210 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
211 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, (RTGCUINTPTR)pvFault);
212 if (VBOX_SUCCESS(rc))
213 {
214 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
215 return rc;
216 }
217 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
218 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
219 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
220 return VINF_PGM_SYNC_CR3;
221 }
222
223# if PGM_WITH_PAGING(PGM_GST_TYPE)
224 /*
225 * Check if this address is within any of our mappings.
226 *
227 * This is *very* fast and it's gonna save us a bit of effort below and prevent
228 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
229 * (BTW, it's impossible to have physical access handlers in a mapping.)
230 */
231 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
232 {
233 STAM_PROFILE_START(&pVM->pgm.s.StatMapping, a);
234 PPGMMAPPING pMapping = CTXALLSUFF(pVM->pgm.s.pMappings);
235 for ( ; pMapping; pMapping = CTXALLSUFF(pMapping->pNext))
236 {
237 if ((RTGCUINTPTR)pvFault < (RTGCUINTPTR)pMapping->GCPtr)
238 break;
239 if ((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pMapping->GCPtr < pMapping->cb)
240 {
241 /*
242 * The first thing we check is if we've got an undetected conflict.
243 */
244 if (!pVM->pgm.s.fMappingsFixed)
245 {
246 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
247 while (iPT-- > 0)
248 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
249 {
250 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eConflicts);
251 Log(("Trap0e: Detected Conflict %VGv-%VGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
252 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
253 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
254 return VINF_PGM_SYNC_CR3;
255 }
256 }
257
258 /*
259 * Check if the fault address is in a virtual page access handler range.
260 */
261 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->HyperVirtHandlers, pvFault);
262 if ( pCur
263 && (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
264 && uErr & X86_TRAP_PF_RW)
265 {
266# ifdef IN_GC
267 STAM_PROFILE_START(&pCur->Stat, h);
268 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
269 STAM_PROFILE_STOP(&pCur->Stat, h);
270# else
271 AssertFailed();
272 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
273# endif
274 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eMapHandler);
275 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
276 return rc;
277 }
278
279 /*
280 * Pretend we're not here and let the guest handle the trap.
281 */
282 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
283 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eMap);
284 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
285 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
286 return VINF_EM_RAW_GUEST_TRAP;
287 }
288 }
289 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
290 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
291# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
292
293 /*
294 * Check if this fault address is flagged for special treatment,
295 * which means we'll have to figure out the physical address and
296 * check flags associated with it.
297 *
298 * ASSUME that we can limit any special access handling to pages
299 * in page tables which the guest believes to be present.
300 */
301 if (PdeSrc.n.u1Present)
302 {
303 RTGCPHYS GCPhys = NIL_RTGCPHYS;
304
305# if PGM_WITH_PAGING(PGM_GST_TYPE)
306 uint32_t cr4 = CPUMGetGuestCR4(pVM);
307 if ( PdeSrc.b.u1Size
308 && (cr4 & X86_CR4_PSE))
309 GCPhys = (PdeSrc.u & GST_PDE_BIG_PG_MASK)
310 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
311 else
312 {
313 PGSTPT pPTSrc;
314 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
315 if (VBOX_SUCCESS(rc))
316 {
317 unsigned iPTESrc = ((RTGCUINTPTR)pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
318 if (pPTSrc->a[iPTESrc].n.u1Present)
319 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
320 }
321 }
322# else
323 /* No paging so the fault address is the physical address */
324 GCPhys = (RTGCPHYS)((RTGCUINTPTR)pvFault & ~PAGE_OFFSET_MASK);
325# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
326
327 /*
328 * If we have a GC address we'll check if it has any flags set.
329 */
330 if (GCPhys != NIL_RTGCPHYS)
331 {
332 STAM_PROFILE_START(&pVM->pgm.s.StatHandlers, b);
333
334 PPGMPAGE pPage;
335 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
336 if (VBOX_SUCCESS(rc))
337 {
338 if (PGM_PAGE_HAS_ANY_HANDLERS(pPage))
339 {
340 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
341 {
342 /*
343 * Physical page access handler.
344 */
345 const RTGCPHYS GCPhysFault = GCPhys | ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK);
346 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->PhysHandlers, GCPhysFault);
347 if (pCur)
348 {
349# ifdef PGM_SYNC_N_PAGES
350 /*
351 * If the region is write protected and we got a page not present fault, then sync
352 * the pages. If the fault was caused by a read, then restart the instruction.
353 * In case of write access continue to the GC write handler.
354 *
355 * ASSUMES that there is only one handler per page or that they have similar write properties.
356 */
357 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
358 && !(uErr & X86_TRAP_PF_P))
359 {
360 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
361 if ( VBOX_FAILURE(rc)
362 || !(uErr & X86_TRAP_PF_RW)
363 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
364 {
365 AssertRC(rc);
366 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
367 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
368 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
369 return rc;
370 }
371 }
372# endif
373
374 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
375 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
376 ("Unexpected trap for physical handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
377
378#if defined(IN_GC) || defined(IN_RING0)
379 if (CTXALLSUFF(pCur->pfnHandler))
380 {
381 STAM_PROFILE_START(&pCur->Stat, h);
382 rc = pCur->CTXALLSUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, CTXALLSUFF(pCur->pvUser));
383 STAM_PROFILE_STOP(&pCur->Stat, h);
384 }
385 else
386#endif
387 rc = VINF_EM_RAW_EMULATE_INSTR;
388 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersPhysical);
389 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
390 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndPhys; });
391 return rc;
392 }
393 }
394# if PGM_WITH_PAGING(PGM_GST_TYPE)
395 else
396 {
397# ifdef PGM_SYNC_N_PAGES
398 /*
399 * If the region is write protected and we got a page not present fault, then sync
400 * the pages. If the fault was caused by a read, then restart the instruction.
401 * In case of write access continue to the GC write handler.
402 */
403 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
404 && !(uErr & X86_TRAP_PF_P))
405 {
406 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
407 if ( VBOX_FAILURE(rc)
408 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
409 || !(uErr & X86_TRAP_PF_RW))
410 {
411 AssertRC(rc);
412 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
413 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
414 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndVirt; });
415 return rc;
416 }
417 }
418# endif
419 /*
420 * Ok, it's an virtual page access handler.
421 *
422 * Since it's faster to search by address, we'll do that first
423 * and then retry by GCPhys if that fails.
424 */
425 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
426 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
427 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
428 */
429 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
430 if (pCur)
431 {
432 AssertMsg(!((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
433 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
434 || !(uErr & X86_TRAP_PF_P)
435 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
436 ("Unexpected trap for virtual handler: %VGv (phys=%VGp) HCPhys=%HGp uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
437
438 if ( (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
439 && ( uErr & X86_TRAP_PF_RW
440 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
441 {
442# ifdef IN_GC
443 STAM_PROFILE_START(&pCur->Stat, h);
444 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
445 STAM_PROFILE_STOP(&pCur->Stat, h);
446# else
447 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
448# endif
449 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtual);
450 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
451 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
452 return rc;
453 }
454 /* Unhandled part of a monitored page */
455 }
456 else
457 {
458 /* Check by physical address. */
459 PPGMVIRTHANDLER pCur;
460 unsigned iPage;
461 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK),
462 &pCur, &iPage);
463 Assert(VBOX_SUCCESS(rc) || !pCur);
464 if ( pCur
465 && ( uErr & X86_TRAP_PF_RW
466 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
467 {
468 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
469# ifdef IN_GC
470 RTGCUINTPTR off = (iPage << PAGE_SHIFT) + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK) - ((RTGCUINTPTR)pCur->GCPtr & PAGE_OFFSET_MASK);
471 Assert(off < pCur->cb);
472 STAM_PROFILE_START(&pCur->Stat, h);
473 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, off);
474 STAM_PROFILE_STOP(&pCur->Stat, h);
475# else
476 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
477# endif
478 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualByPhys);
479 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
480 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
481 return rc;
482 }
483 }
484 }
485# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
486
487 /*
488 * There is a handled area of the page, but this fault doesn't belong to it.
489 * We must emulate the instruction.
490 *
491 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
492 * we first check if this was a page-not-present fault for a page with only
493 * write access handlers. Restart the instruction if it wasn't a write access.
494 */
495 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersUnhandled);
496
497 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
498 && !(uErr & X86_TRAP_PF_P))
499 {
500 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
501 if ( VBOX_FAILURE(rc)
502 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
503 || !(uErr & X86_TRAP_PF_RW))
504 {
505 AssertRC(rc);
506 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
507 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
508 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
509 return rc;
510 }
511 }
512
513 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
514 * It's writing to an unhandled part of the LDT page several million times.
515 */
516 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
517 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d HCPhys=%RHp%s%s\n",
518 rc, pPage->HCPhys,
519 PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage) ? " phys" : "",
520 PGM_PAGE_HAS_ANY_VIRTUAL_HANDLERS(pPage) ? " virt" : ""));
521 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
522 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndUnhandled; });
523 return rc;
524 } /* if any kind of handler */
525
526# if PGM_WITH_PAGING(PGM_GST_TYPE)
527 if (uErr & X86_TRAP_PF_P)
528 {
529 /*
530 * The page isn't marked, but it might still be monitored by a virtual page access handler.
531 * (ASSUMES no temporary disabling of virtual handlers.)
532 */
533 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
534 * we should correct both the shadow page table and physical memory flags, and not only check for
535 * accesses within the handler region but for access to pages with virtual handlers. */
536 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
537 if (pCur)
538 {
539 AssertMsg( !((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
540 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
541 || !(uErr & X86_TRAP_PF_P)
542 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
543 ("Unexpected trap for virtual handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
544
545 if ( (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
546 && ( uErr & X86_TRAP_PF_RW
547 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
548 {
549# ifdef IN_GC
550 STAM_PROFILE_START(&pCur->Stat, h);
551 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
552 STAM_PROFILE_STOP(&pCur->Stat, h);
553# else
554 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
555# endif
556 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualUnmarked);
557 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
558 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
559 return rc;
560 }
561 }
562 }
563# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
564 }
565 else
566 {
567 /* When the guest accesses invalid physical memory (e.g. probing of RAM or accessing a remapped MMIO range), then we'll fall
568 * back to the recompiler to emulate the instruction.
569 */
570 LogFlow(("pgmPhysGetPageEx %VGp failed with %Vrc\n", GCPhys, rc));
571 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersInvalid);
572 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
573 return VINF_EM_RAW_EMULATE_INSTR;
574 }
575
576 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
577
578# ifdef PGM_OUT_OF_SYNC_IN_GC
579 /*
580 * We are here only if page is present in Guest page tables and trap is not handled
581 * by our handlers.
582 * Check it for page out-of-sync situation.
583 */
584 STAM_PROFILE_START(&pVM->pgm.s.StatOutOfSync, c);
585
586 if (!(uErr & X86_TRAP_PF_P))
587 {
588 /*
589 * Page is not present in our page tables.
590 * Try to sync it!
591 * BTW, fPageShw is invalid in this branch!
592 */
593 if (uErr & X86_TRAP_PF_US)
594 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
595 else /* supervisor */
596 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
597
598# if defined(LOG_ENABLED) && !defined(IN_RING0)
599 RTGCPHYS GCPhys;
600 uint64_t fPageGst;
601 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
602 Log(("Page out of sync: %VGv eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%VGp scan=%d\n",
603 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)));
604# endif /* LOG_ENABLED */
605
606# if PGM_WITH_PAGING(PGM_GST_TYPE) && !defined(IN_RING0)
607 if (CPUMGetGuestCPL(pVM, pRegFrame) == 0)
608 {
609 uint64_t fPageGst;
610 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
611 if ( VBOX_SUCCESS(rc)
612 && !(fPageGst & X86_PTE_US))
613 {
614 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
615 if ( pvFault == (RTGCPTR)pRegFrame->eip
616 || (RTGCUINTPTR)pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
617# ifdef CSAM_DETECT_NEW_CODE_PAGES
618 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
619 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)) /* any new code we encounter here */
620# endif /* CSAM_DETECT_NEW_CODE_PAGES */
621 )
622 {
623 LogFlow(("CSAMExecFault %VGv\n", pRegFrame->eip));
624 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
625 if (rc != VINF_SUCCESS)
626 {
627 /*
628 * CSAM needs to perform a job in ring 3.
629 *
630 * Sync the page before going to the host context; otherwise we'll end up in a loop if
631 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
632 */
633 LogFlow(("CSAM ring 3 job\n"));
634 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
635 AssertRC(rc2);
636
637 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
638 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eCSAM; });
639 return rc;
640 }
641 }
642# ifdef CSAM_DETECT_NEW_CODE_PAGES
643 else
644 if ( uErr == X86_TRAP_PF_RW
645 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
646 && pRegFrame->ecx < 0x10000
647 )
648 {
649 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
650 * to detect loading of new code pages.
651 */
652
653 /*
654 * Decode the instruction.
655 */
656 RTGCPTR PC;
657 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
658 if (rc == VINF_SUCCESS)
659 {
660 DISCPUSTATE Cpu;
661 uint32_t cbOp;
662 rc = EMInterpretDisasOneEx(pVM, (RTGCUINTPTR)PC, pRegFrame, &Cpu, &cbOp);
663
664 /* For now we'll restrict this to rep movsw/d instructions */
665 if ( rc == VINF_SUCCESS
666 && Cpu.pCurInstr->opcode == OP_MOVSWD
667 && (Cpu.prefix & PREFIX_REP))
668 {
669 CSAMMarkPossibleCodePage(pVM, pvFault);
670 }
671 }
672 }
673# endif /* CSAM_DETECT_NEW_CODE_PAGES */
674
675 /*
676 * Mark this page as safe.
677 */
678 /** @todo not correct for pages that contain both code and data!! */
679 Log2(("CSAMMarkPage %VGv; scanned=%d\n", pvFault, true));
680 CSAMMarkPage(pVM, (RTRCPTR)pvFault, true);
681 }
682 }
683# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) && !defined(IN_RING0) */
684 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
685 if (VBOX_SUCCESS(rc))
686 {
687 /* The page was successfully synced, return to the guest. */
688 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
689 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSync; });
690 return VINF_SUCCESS;
691 }
692 }
693 else
694 {
695 /*
696 * A side effect of not flushing global PDEs are out of sync pages due
697 * to physical monitored regions, that are no longer valid.
698 * Assume for now it only applies to the read/write flag
699 */
700 if (VBOX_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
701 {
702 if (uErr & X86_TRAP_PF_US)
703 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
704 else /* supervisor */
705 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
706
707
708 /*
709 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the page is not present, which is not true in this case.
710 */
711 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
712 if (VBOX_SUCCESS(rc))
713 {
714 /*
715 * Page was successfully synced, return to guest.
716 */
717# ifdef VBOX_STRICT
718 RTGCPHYS GCPhys;
719 uint64_t fPageGst;
720 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
721 Assert(VBOX_SUCCESS(rc) && fPageGst & X86_PTE_RW);
722 LogFlow(("Obsolete physical monitor page out of sync %VGv - phys %VGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
723
724 uint64_t fPageShw;
725 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
726 AssertMsg(VBOX_SUCCESS(rc) && fPageShw & X86_PTE_RW, ("rc=%Vrc fPageShw=%VX64\n", rc, fPageShw));
727# endif /* VBOX_STRICT */
728 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
729 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncObsHnd; });
730 return VINF_SUCCESS;
731 }
732
733 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
734 if ( CPUMGetGuestCPL(pVM, pRegFrame) == 0
735 && ((CPUMGetGuestCR0(pVM) & (X86_CR0_WP|X86_CR0_PG)) == X86_CR0_PG)
736 && (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P))
737 {
738 uint64_t fPageGst;
739 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
740 if ( VBOX_SUCCESS(rc)
741 && !(fPageGst & X86_PTE_RW))
742 {
743 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
744 if (VBOX_SUCCESS(rc))
745 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eWPEmulGC);
746 else
747 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eWPEmulR3);
748 return rc;
749 }
750 else
751 AssertMsgFailed(("Unexpected r/w page %x flag=%x\n", pvFault, (uint32_t)fPageGst));
752 }
753
754 }
755
756# if PGM_WITH_PAGING(PGM_GST_TYPE)
757# ifdef VBOX_STRICT
758 /*
759 * Check for VMM page flags vs. Guest page flags consistency.
760 * Currently only for debug purposes.
761 */
762 if (VBOX_SUCCESS(rc))
763 {
764 /* Get guest page flags. */
765 uint64_t fPageGst;
766 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
767 if (VBOX_SUCCESS(rc))
768 {
769 uint64_t fPageShw;
770 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
771
772 /*
773 * Compare page flags.
774 * Note: we have AVL, A, D bits desynched.
775 */
776 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
777 ("Page flags mismatch! pvFault=%VGv GCPhys=%VGp fPageShw=%08llx fPageGst=%08llx\n", pvFault, GCPhys, fPageShw, fPageGst));
778 }
779 else
780 AssertMsgFailed(("PGMGstGetPage rc=%Vrc\n", rc));
781 }
782 else
783 AssertMsgFailed(("PGMGCGetPage rc=%Vrc\n", rc));
784# endif /* VBOX_STRICT */
785# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
786 }
787 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
788# endif /* PGM_OUT_OF_SYNC_IN_GC */
789 }
790 else
791 {
792 /*
793 * Page not present in Guest OS or invalid page table address.
794 * This is potential virtual page access handler food.
795 *
796 * For the present we'll say that our access handlers don't
797 * work for this case - we've already discarded the page table
798 * not present case which is identical to this.
799 *
800 * When we perchance find we need this, we will probably have AVL
801 * trees (offset based) to operate on and we can measure their speed
802 * agains mapping a page table and probably rearrange this handling
803 * a bit. (Like, searching virtual ranges before checking the
804 * physical address.)
805 */
806 }
807 }
808
809
810# if PGM_WITH_PAGING(PGM_GST_TYPE)
811 /*
812 * Conclusion, this is a guest trap.
813 */
814 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
815 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eUnhandled);
816 return VINF_EM_RAW_GUEST_TRAP;
817# else
818 /* present, but not a monitored page; perhaps the guest is probing physical memory */
819 return VINF_EM_RAW_EMULATE_INSTR;
820# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
821
822
823#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
824
825 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
826 return VERR_INTERNAL_ERROR;
827#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
828}
829
830
831/**
832 * Emulation of the invlpg instruction.
833 *
834 *
835 * @returns VBox status code.
836 *
837 * @param pVM VM handle.
838 * @param GCPtrPage Page to invalidate.
839 *
840 * @remark ASSUMES that the guest is updating before invalidating. This order
841 * isn't required by the CPU, so this is speculative and could cause
842 * trouble.
843 *
844 * @todo Flush page or page directory only if necessary!
845 * @todo Add a #define for simply invalidating the page.
846 */
847PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage)
848{
849#if PGM_WITH_PAGING(PGM_GST_TYPE) \
850 && PGM_SHW_TYPE != PGM_TYPE_NESTED
851 int rc;
852
853 LogFlow(("InvalidatePage %x\n", GCPtrPage));
854 /*
855 * Get the shadow PD entry and skip out if this PD isn't present.
856 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
857 */
858 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
859# if PGM_SHW_TYPE == PGM_TYPE_32BIT
860 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
861# elif PGM_SHW_TYPE == PGM_TYPE_PAE
862 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs[0])->a[iPDDst];
863# else /* AMD64 */
864 /* PML4 */
865 const unsigned iPml4 = ((RTGCUINTPTR64)GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
866 PX86PML4E pPml4eDst = &CTXMID(pVM->pgm.s.p,PaePML4)->a[iPml4];
867 if (!pPml4eDst->n.u1Present)
868 {
869 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
870 return VINF_SUCCESS;
871 }
872
873 /* PDPT */
874 PX86PDPT pPDPT;
875 rc = PGM_HCPHYS_2_PTR(pVM, pPml4eDst->u & X86_PML4E_PG_MASK, &pPDPT);
876 if (VBOX_FAILURE(rc))
877 return rc;
878 const unsigned iPDPT = ((RTGCUINTPTR64)GCPtrPage >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
879 PX86PDPE pPdpeDst = &pPDPT->a[iPDPT];
880 if (!pPdpeDst->n.u1Present)
881 {
882 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
883 return VINF_SUCCESS;
884 }
885
886 /* PD */
887 PX86PDPAE pPd;
888 rc = PGM_HCPHYS_2_PTR(pVM, pPdpeDst->u & X86_PDPE_PG_MASK, &pPd);
889 if (VBOX_FAILURE(rc))
890 return rc;
891 const unsigned iPd = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
892 PX86PDEPAE pPdeDst = &pPd->a[iPd];
893# endif
894
895 const SHWPDE PdeDst = *pPdeDst;
896 if (!PdeDst.n.u1Present)
897 {
898 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
899 return VINF_SUCCESS;
900 }
901
902 /*
903 * Get the guest PD entry and calc big page.
904 */
905# if PGM_GST_TYPE == PGM_TYPE_32BIT
906 PX86PD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
907 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
908 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
909# else
910 unsigned iPDSrc;
911# if PGM_GST_TYPE == PGM_TYPE_PAE
912 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
913# else /* AMD64 */
914 PX86PML4E pPml4eSrc;
915 X86PDPE PdpeSrc;
916 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
917# endif
918 GSTPDE PdeSrc;
919
920 if (pPDSrc)
921 PdeSrc = pPDSrc->a[iPDSrc];
922 else
923 PdeSrc.u = 0;
924# endif
925
926 const uint32_t cr4 = CPUMGetGuestCR4(pVM);
927 const bool fIsBigPage = PdeSrc.b.u1Size && (cr4 & X86_CR4_PSE);
928
929# ifdef IN_RING3
930 /*
931 * If a CR3 Sync is pending we may ignore the invalidate page operation
932 * depending on the kind of sync and if it's a global page or not.
933 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
934 */
935# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
936 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
937 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
938 && fIsBigPage
939 && PdeSrc.b.u1Global
940 && (cr4 & X86_CR4_PGE)
941 )
942 )
943# else
944 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
945# endif
946 {
947 STAM_COUNTER_INC(&pVM->pgm.s.StatHCInvalidatePageSkipped);
948 return VINF_SUCCESS;
949 }
950# endif /* IN_RING3 */
951
952
953# if PGM_GST_TYPE == PGM_TYPE_AMD64
954 Assert(pPml4eDst->n.u1Present && pPml4eDst->u & SHW_PDPT_MASK);
955 if (pPml4eSrc->n.u1Present)
956 {
957 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
958 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
959 {
960 /*
961 * Mark not present so we can resync the PML4E when it's used.
962 */
963 LogFlow(("InvalidatePage: Out-of-sync PML4E at %VGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
964 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
965 pgmPoolFree(pVM, pPml4eDst->u & X86_PML4E_PG_MASK, PGMPOOL_IDX_PML4, iPml4);
966 pPml4eDst->u = 0;
967 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
968 PGM_INVL_GUEST_TLBS();
969 }
970 else if (!pPml4eSrc->n.u1Accessed)
971 {
972 /*
973 * Mark not present so we can set the accessed bit.
974 */
975 pgmPoolFree(pVM, pPml4eDst->u & X86_PML4E_PG_MASK, PGMPOOL_IDX_PML4, iPml4);
976 pPml4eDst->u = 0;
977 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
978 PGM_INVL_GUEST_TLBS();
979 }
980 }
981 else
982 {
983 pgmPoolFree(pVM, pPml4eDst->u & X86_PML4E_PG_MASK, PGMPOOL_IDX_PML4, iPml4);
984 pPml4eDst->u = 0;
985 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
986 PGM_INVL_PG(GCPtrPage);
987 return VINF_SUCCESS;
988 }
989
990 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
991 if (PdpeSrc.n.u1Present)
992 {
993 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
994 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
995 {
996 /*
997 * Mark not present so we can resync the PML4E when it's used.
998 */
999 LogFlow(("InvalidatePage: Out-of-sync PDPE at %VGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1000 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1001 pgmPoolFree(pVM, pPdpeDst->u & SHW_PDPT_MASK, PGMPOOL_IDX_PML4, iPml4);
1002 pPdpeDst->u = 0;
1003 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1004 PGM_INVL_GUEST_TLBS();
1005 }
1006 else if (!PdpeSrc.lm.u1Accessed)
1007 {
1008 /*
1009 * Mark not present so we can set the accessed bit.
1010 */
1011 pgmPoolFree(pVM, pPdpeDst->u & SHW_PDPT_MASK, PGMPOOL_IDX_PML4, iPml4);
1012 pPdpeDst->u = 0;
1013 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
1014 PGM_INVL_GUEST_TLBS();
1015 }
1016 }
1017 else
1018 {
1019 pgmPoolFree(pVM, pPdpeDst->u & SHW_PDPT_MASK, PGMPOOL_IDX_PDPT, iPDDst);
1020 pPdpeDst->u = 0;
1021 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
1022 PGM_INVL_PG(GCPtrPage);
1023 return VINF_SUCCESS;
1024 }
1025# endif
1026
1027 /*
1028 * Deal with the Guest PDE.
1029 */
1030 rc = VINF_SUCCESS;
1031 if (PdeSrc.n.u1Present)
1032 {
1033 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1034 {
1035 /*
1036 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1037 */
1038 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1039 Assert(PGMGetGuestMode(pVM) <= PGMMODE_32_BIT);
1040 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
1041 }
1042 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
1043 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1044 {
1045 /*
1046 * Mark not present so we can resync the PDE when it's used.
1047 */
1048 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1049 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1050 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1051 pPdeDst->u = 0;
1052 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1053 PGM_INVL_GUEST_TLBS();
1054 }
1055 else if (!PdeSrc.n.u1Accessed)
1056 {
1057 /*
1058 * Mark not present so we can set the accessed bit.
1059 */
1060 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1061 pPdeDst->u = 0;
1062 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
1063 PGM_INVL_GUEST_TLBS();
1064 }
1065 else if (!fIsBigPage)
1066 {
1067 /*
1068 * 4KB - page.
1069 */
1070 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1071 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1072# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1073 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1074 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1075# endif
1076 if (pShwPage->GCPhys == GCPhys)
1077 {
1078# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1079 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1080 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1081 if (pPT->a[iPTEDst].n.u1Present)
1082 {
1083# ifdef PGMPOOL_WITH_USER_TRACKING
1084 /* This is very unlikely with caching/monitoring enabled. */
1085 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1086# endif
1087 pPT->a[iPTEDst].u = 0;
1088 }
1089# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1090 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
1091 if (VBOX_SUCCESS(rc))
1092 rc = VINF_SUCCESS;
1093# endif
1094 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4KBPages));
1095 PGM_INVL_PG(GCPtrPage);
1096 }
1097 else
1098 {
1099 /*
1100 * The page table address changed.
1101 */
1102 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%VGp iPDDst=%#x\n",
1103 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1104 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1105 pPdeDst->u = 0;
1106 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1107 PGM_INVL_GUEST_TLBS();
1108 }
1109 }
1110 else
1111 {
1112 /*
1113 * 2/4MB - page.
1114 */
1115 /* Before freeing the page, check if anything really changed. */
1116 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1117 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
1118# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1119 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1120 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1121# endif
1122 if ( pShwPage->GCPhys == GCPhys
1123 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1124 {
1125 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1126 /** @todo PAT */
1127 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1128 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1129 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1130 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1131 {
1132 LogFlow(("Skipping flush for big page containing %VGv (PD=%X .u=%VX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1133 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPagesSkip));
1134 return VINF_SUCCESS;
1135 }
1136 }
1137
1138 /*
1139 * Ok, the page table is present and it's been changed in the guest.
1140 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1141 * We could do this for some flushes in GC too, but we need an algorithm for
1142 * deciding which 4MB pages containing code likely to be executed very soon.
1143 */
1144 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1145 pPdeDst->u = 0;
1146 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPages));
1147 PGM_INVL_BIG_PG(GCPtrPage);
1148 }
1149 }
1150 else
1151 {
1152 /*
1153 * Page directory is not present, mark shadow PDE not present.
1154 */
1155 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1156 {
1157 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1158 pPdeDst->u = 0;
1159 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
1160 PGM_INVL_PG(GCPtrPage);
1161 }
1162 else
1163 {
1164 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1165 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDMappings));
1166 }
1167 }
1168
1169 return rc;
1170
1171#else /* guest real and protected mode */
1172 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1173 return VINF_SUCCESS;
1174#endif
1175}
1176
1177
1178#ifdef PGMPOOL_WITH_USER_TRACKING
1179/**
1180 * Update the tracking of shadowed pages.
1181 *
1182 * @param pVM The VM handle.
1183 * @param pShwPage The shadow page.
1184 * @param HCPhys The physical page we is being dereferenced.
1185 */
1186DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1187{
1188# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1189 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1190 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%VHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1191
1192 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1193 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1194 * 2. write protect all shadowed pages. I.e. implement caching.
1195 */
1196 /*
1197 * Find the guest address.
1198 */
1199 for (PPGMRAMRANGE pRam = CTXALLSUFF(pVM->pgm.s.pRamRanges);
1200 pRam;
1201 pRam = CTXALLSUFF(pRam->pNext))
1202 {
1203 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1204 while (iPage-- > 0)
1205 {
1206 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1207 {
1208 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1209 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1210 pShwPage->cPresent--;
1211 pPool->cPresent--;
1212 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1213 return;
1214 }
1215 }
1216 }
1217
1218 for (;;)
1219 AssertReleaseMsgFailed(("HCPhys=%VHp wasn't found!\n", HCPhys));
1220# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1221 pShwPage->cPresent--;
1222 pVM->pgm.s.CTXSUFF(pPool)->cPresent--;
1223# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1224}
1225
1226
1227/**
1228 * Update the tracking of shadowed pages.
1229 *
1230 * @param pVM The VM handle.
1231 * @param pShwPage The shadow page.
1232 * @param u16 The top 16-bit of the pPage->HCPhys.
1233 * @param pPage Pointer to the guest page. this will be modified.
1234 * @param iPTDst The index into the shadow table.
1235 */
1236DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1237{
1238# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1239 /*
1240 * We're making certain assumptions about the placement of cRef and idx.
1241 */
1242 Assert(MM_RAM_FLAGS_IDX_SHIFT == 48);
1243 Assert(MM_RAM_FLAGS_CREFS_SHIFT > MM_RAM_FLAGS_IDX_SHIFT);
1244
1245 /*
1246 * Just deal with the simple first time here.
1247 */
1248 if (!u16)
1249 {
1250 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1251 u16 = (1 << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) | pShwPage->idx;
1252 }
1253 else
1254 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1255
1256 /* write back, trying to be clever... */
1257 Log2(("SyncPageWorkerTrackAddRef: u16=%#x pPage->HCPhys=%VHp->%VHp iPTDst=%#x\n",
1258 u16, pPage->HCPhys, (pPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) | ((uint64_t)u16 << MM_RAM_FLAGS_CREFS_SHIFT), iPTDst));
1259 *((uint16_t *)&pPage->HCPhys + 3) = u16; /** @todo PAGE FLAGS */
1260# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1261
1262 /* update statistics. */
1263 pVM->pgm.s.CTXSUFF(pPool)->cPresent++;
1264 pShwPage->cPresent++;
1265 if (pShwPage->iFirstPresent > iPTDst)
1266 pShwPage->iFirstPresent = iPTDst;
1267}
1268#endif /* PGMPOOL_WITH_USER_TRACKING */
1269
1270
1271/**
1272 * Creates a 4K shadow page for a guest page.
1273 *
1274 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1275 * physical address. The PdeSrc argument only the flags are used. No page structured
1276 * will be mapped in this function.
1277 *
1278 * @param pVM VM handle.
1279 * @param pPteDst Destination page table entry.
1280 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1281 * Can safely assume that only the flags are being used.
1282 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1283 * @param pShwPage Pointer to the shadow page.
1284 * @param iPTDst The index into the shadow table.
1285 *
1286 * @remark Not used for 2/4MB pages!
1287 */
1288DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1289{
1290 if (PteSrc.n.u1Present)
1291 {
1292 /*
1293 * Find the ram range.
1294 */
1295 PPGMPAGE pPage;
1296 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1297 if (VBOX_SUCCESS(rc))
1298 {
1299 /** @todo investiage PWT, PCD and PAT. */
1300 /*
1301 * Make page table entry.
1302 */
1303 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo FLAGS */
1304 SHWPTE PteDst;
1305 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1306 {
1307 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1308 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1309 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1310 | (HCPhys & X86_PTE_PAE_PG_MASK);
1311 else
1312 {
1313 LogFlow(("SyncPageWorker: monitored page (%VGp) -> mark not present\n", HCPhys));
1314 PteDst.u = 0;
1315 }
1316 /** @todo count these two kinds. */
1317 }
1318 else
1319 {
1320 /*
1321 * If the page or page directory entry is not marked accessed,
1322 * we mark the page not present.
1323 */
1324 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1325 {
1326 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1327 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,AccessedPage));
1328 PteDst.u = 0;
1329 }
1330 else
1331 /*
1332 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1333 * when the page is modified.
1334 */
1335 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1336 {
1337 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPage));
1338 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1339 | (HCPhys & X86_PTE_PAE_PG_MASK)
1340 | PGM_PTFLAGS_TRACK_DIRTY;
1341 }
1342 else
1343 {
1344 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageSkipped));
1345 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1346 | (HCPhys & X86_PTE_PAE_PG_MASK);
1347 }
1348 }
1349
1350#ifdef PGMPOOL_WITH_USER_TRACKING
1351 /*
1352 * Keep user track up to date.
1353 */
1354 if (PteDst.n.u1Present)
1355 {
1356 if (!pPteDst->n.u1Present)
1357 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1358 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1359 {
1360 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1361 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1362 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1363 }
1364 }
1365 else if (pPteDst->n.u1Present)
1366 {
1367 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1368 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1369 }
1370#endif /* PGMPOOL_WITH_USER_TRACKING */
1371
1372 /*
1373 * Update statistics and commit the entry.
1374 */
1375 if (!PteSrc.n.u1Global)
1376 pShwPage->fSeenNonGlobal = true;
1377 *pPteDst = PteDst;
1378 }
1379 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1380 /** @todo count these. */
1381 }
1382 else
1383 {
1384 /*
1385 * Page not-present.
1386 */
1387 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1388#ifdef PGMPOOL_WITH_USER_TRACKING
1389 /* Keep user track up to date. */
1390 if (pPteDst->n.u1Present)
1391 {
1392 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1393 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1394 }
1395#endif /* PGMPOOL_WITH_USER_TRACKING */
1396 pPteDst->u = 0;
1397 /** @todo count these. */
1398 }
1399}
1400
1401
1402/**
1403 * Syncs a guest OS page.
1404 *
1405 * There are no conflicts at this point, neither is there any need for
1406 * page table allocations.
1407 *
1408 * @returns VBox status code.
1409 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1410 * @param pVM VM handle.
1411 * @param PdeSrc Page directory entry of the guest.
1412 * @param GCPtrPage Guest context page address.
1413 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1414 * @param uErr Fault error (X86_TRAP_PF_*).
1415 */
1416PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr)
1417{
1418 LogFlow(("SyncPage: GCPtrPage=%VGv cPages=%d uErr=%#x\n", GCPtrPage, cPages, uErr));
1419
1420#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1421 || PGM_GST_TYPE == PGM_TYPE_PAE \
1422 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1423 && PGM_SHW_TYPE != PGM_TYPE_NESTED
1424
1425# if PGM_WITH_NX(PGM_GST_TYPE)
1426 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1427# endif
1428
1429 /*
1430 * Assert preconditions.
1431 */
1432 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1433 Assert(PdeSrc.n.u1Present);
1434 Assert(cPages);
1435
1436 /*
1437 * Get the shadow PDE, find the shadow page table in the pool.
1438 */
1439# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1440 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1441 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1442# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1443 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1444 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1445# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1446 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1447 PX86PDPAE pPDDst;
1448 X86PDEPAE PdeDst;
1449
1450 int rc = PGMShwGetAllocLongModePDPtr(pVM, GCPtrPage, &pPDDst);
1451 if (rc != VINF_SUCCESS)
1452 {
1453 AssertMsg(rc == VINF_PGM_SYNC_CR3, ("Unexpected rc=%Vrc\n", rc));
1454 return rc;
1455 }
1456 Assert(pPDDst);
1457 PdeDst = pPDDst->a[iPDDst];
1458# endif
1459 Assert(PdeDst.n.u1Present);
1460 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1461
1462 /*
1463 * Check that the page is present and that the shadow PDE isn't out of sync.
1464 */
1465 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1466 RTGCPHYS GCPhys;
1467 if (!fBigPage)
1468 {
1469 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1470# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1471 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1472 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1473# endif
1474 }
1475 else
1476 {
1477 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
1478# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1479 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1480 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1481# endif
1482 }
1483 if ( pShwPage->GCPhys == GCPhys
1484 && PdeSrc.n.u1Present
1485 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1486 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1487# if PGM_WITH_NX(PGM_GST_TYPE)
1488 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1489# endif
1490 )
1491 {
1492 /*
1493 * Check that the PDE is marked accessed already.
1494 * Since we set the accessed bit *before* getting here on a #PF, this
1495 * check is only meant for dealing with non-#PF'ing paths.
1496 */
1497 if (PdeSrc.n.u1Accessed)
1498 {
1499 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1500 if (!fBigPage)
1501 {
1502 /*
1503 * 4KB Page - Map the guest page table.
1504 */
1505 PGSTPT pPTSrc;
1506 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1507 if (VBOX_SUCCESS(rc))
1508 {
1509# ifdef PGM_SYNC_N_PAGES
1510 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1511 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1512 {
1513 /*
1514 * This code path is currently only taken when the caller is PGMTrap0eHandler
1515 * for non-present pages!
1516 *
1517 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1518 * deal with locality.
1519 */
1520 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1521# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1522 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1523 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1524# else
1525 const unsigned offPTSrc = 0;
1526# endif
1527 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1528 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1529 iPTDst = 0;
1530 else
1531 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1532 for (; iPTDst < iPTDstEnd; iPTDst++)
1533 {
1534 if (!pPTDst->a[iPTDst].n.u1Present)
1535 {
1536 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1537 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1538 NOREF(GCPtrCurPage);
1539#ifndef IN_RING0
1540 /*
1541 * Assuming kernel code will be marked as supervisor - and not as user level
1542 * and executed using a conforming code selector - And marked as readonly.
1543 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1544 */
1545 PPGMPAGE pPage;
1546 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1547 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1548 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)GCPtrCurPage)
1549 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1550 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1551 )
1552#endif /* else: CSAM not active */
1553 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1554 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1555 GCPtrCurPage, PteSrc.n.u1Present,
1556 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1557 PteSrc.n.u1User & PdeSrc.n.u1User,
1558 (uint64_t)PteSrc.u,
1559 (uint64_t)pPTDst->a[iPTDst].u,
1560 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1561 }
1562 }
1563 }
1564 else
1565# endif /* PGM_SYNC_N_PAGES */
1566 {
1567 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1568 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1569 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1570 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1571 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1572 GCPtrPage, PteSrc.n.u1Present,
1573 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1574 PteSrc.n.u1User & PdeSrc.n.u1User,
1575 (uint64_t)PteSrc.u,
1576 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1577 }
1578 }
1579 else /* MMIO or invalid page: emulated in #PF handler. */
1580 {
1581 LogFlow(("PGM_GCPHYS_2_PTR %VGp failed with %Vrc\n", GCPhys, rc));
1582 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1583 }
1584 }
1585 else
1586 {
1587 /*
1588 * 4/2MB page - lazy syncing shadow 4K pages.
1589 * (There are many causes of getting here, it's no longer only CSAM.)
1590 */
1591 /* Calculate the GC physical address of this 4KB shadow page. */
1592 RTGCPHYS GCPhys = (PdeSrc.u & GST_PDE_BIG_PG_MASK) | ((RTGCUINTPTR)GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1593 /* Find ram range. */
1594 PPGMPAGE pPage;
1595 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1596 if (VBOX_SUCCESS(rc))
1597 {
1598 /*
1599 * Make shadow PTE entry.
1600 */
1601 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo PAGE FLAGS */
1602 SHWPTE PteDst;
1603 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1604 | (HCPhys & X86_PTE_PAE_PG_MASK);
1605 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1606 {
1607 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1608 PteDst.n.u1Write = 0;
1609 else
1610 PteDst.u = 0;
1611 }
1612 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1613# ifdef PGMPOOL_WITH_USER_TRACKING
1614 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1615 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1616# endif
1617 pPTDst->a[iPTDst] = PteDst;
1618
1619
1620 /*
1621 * If the page is not flagged as dirty and is writable, then make it read-only
1622 * at PD level, so we can set the dirty bit when the page is modified.
1623 *
1624 * ASSUMES that page access handlers are implemented on page table entry level.
1625 * Thus we will first catch the dirty access and set PDE.D and restart. If
1626 * there is an access handler, we'll trap again and let it work on the problem.
1627 */
1628 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1629 * As for invlpg, it simply frees the whole shadow PT.
1630 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1631 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1632 {
1633 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
1634 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1635 PdeDst.n.u1Write = 0;
1636 }
1637 else
1638 {
1639 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1640 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1641 }
1642# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1643 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst] = PdeDst;
1644# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1645 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst] = PdeDst;
1646# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1647 pPDDst->a[iPDDst] = PdeDst;
1648# endif
1649 Log2(("SyncPage: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%VGp%s\n",
1650 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1651 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1652 }
1653 else
1654 LogFlow(("PGM_GCPHYS_2_PTR %VGp (big) failed with %Vrc\n", GCPhys, rc));
1655 }
1656 return VINF_SUCCESS;
1657 }
1658 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDNAs));
1659 }
1660 else
1661 {
1662 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDOutOfSync));
1663 Log2(("SyncPage: Out-Of-Sync PDE at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1664 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1665 }
1666
1667 /*
1668 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1669 * Yea, I'm lazy.
1670 */
1671 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1672# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1673 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst].u = 0;
1674# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1675 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst].u = 0;
1676# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1677 pPDDst->a[iPDDst].u = 0;
1678# endif
1679 PGM_INVL_GUEST_TLBS();
1680 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1681
1682#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1683 && PGM_SHW_TYPE != PGM_TYPE_NESTED
1684
1685# ifdef PGM_SYNC_N_PAGES
1686 /*
1687 * Get the shadow PDE, find the shadow page table in the pool.
1688 */
1689 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1690# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1691 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1692# else /* PAE */
1693 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1694# endif
1695 Assert(PdeDst.n.u1Present);
1696 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1697 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1698
1699# if PGM_SHW_TYPE == PGM_TYPE_PAE
1700 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1701 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1702# else
1703 const unsigned offPTSrc = 0;
1704# endif
1705
1706 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1707 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1708 {
1709 /*
1710 * This code path is currently only taken when the caller is PGMTrap0eHandler
1711 * for non-present pages!
1712 *
1713 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1714 * deal with locality.
1715 */
1716 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1717 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1718 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1719 iPTDst = 0;
1720 else
1721 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1722 for (; iPTDst < iPTDstEnd; iPTDst++)
1723 {
1724 if (!pPTDst->a[iPTDst].n.u1Present)
1725 {
1726 GSTPTE PteSrc;
1727
1728 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1729
1730 /* Fake the page table entry */
1731 PteSrc.u = GCPtrCurPage;
1732 PteSrc.n.u1Present = 1;
1733 PteSrc.n.u1Dirty = 1;
1734 PteSrc.n.u1Accessed = 1;
1735 PteSrc.n.u1Write = 1;
1736 PteSrc.n.u1User = 1;
1737
1738 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1739
1740 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1741 GCPtrCurPage, PteSrc.n.u1Present,
1742 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1743 PteSrc.n.u1User & PdeSrc.n.u1User,
1744 (uint64_t)PteSrc.u,
1745 (uint64_t)pPTDst->a[iPTDst].u,
1746 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1747 }
1748 }
1749 }
1750 else
1751# endif /* PGM_SYNC_N_PAGES */
1752 {
1753 GSTPTE PteSrc;
1754 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1755 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1756
1757 /* Fake the page table entry */
1758 PteSrc.u = GCPtrCurPage;
1759 PteSrc.n.u1Present = 1;
1760 PteSrc.n.u1Dirty = 1;
1761 PteSrc.n.u1Accessed = 1;
1762 PteSrc.n.u1Write = 1;
1763 PteSrc.n.u1User = 1;
1764 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1765
1766 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
1767 GCPtrPage, PteSrc.n.u1Present,
1768 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1769 PteSrc.n.u1User & PdeSrc.n.u1User,
1770 (uint64_t)PteSrc.u,
1771 (uint64_t)pPTDst->a[iPTDst].u,
1772 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1773 }
1774 return VINF_SUCCESS;
1775
1776#else
1777 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
1778 return VERR_INTERNAL_ERROR;
1779#endif
1780}
1781
1782
1783
1784#if PGM_WITH_PAGING(PGM_GST_TYPE)
1785
1786/**
1787 * Investigate page fault and handle write protection page faults caused by
1788 * dirty bit tracking.
1789 *
1790 * @returns VBox status code.
1791 * @param pVM VM handle.
1792 * @param uErr Page fault error code.
1793 * @param pPdeDst Shadow page directory entry.
1794 * @param pPdeSrc Guest page directory entry.
1795 * @param GCPtrPage Guest context page address.
1796 */
1797PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCUINTPTR GCPtrPage)
1798{
1799 bool fWriteProtect = !!(CPUMGetGuestCR0(pVM) & X86_CR0_WP);
1800 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
1801 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
1802 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1803# if PGM_WITH_NX(PGM_GST_TYPE)
1804 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1805# endif
1806 unsigned uPageFaultLevel;
1807 int rc;
1808
1809 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
1810 LogFlow(("CheckPageFault: GCPtrPage=%VGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
1811
1812# if PGM_GST_TYPE == PGM_TYPE_PAE \
1813 || PGM_GST_TYPE == PGM_TYPE_AMD64
1814
1815# if PGM_GST_TYPE == PGM_TYPE_AMD64
1816 PX86PML4E pPml4eSrc;
1817 PX86PDPE pPdpeSrc;
1818
1819 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc);
1820 Assert(pPml4eSrc);
1821
1822 /*
1823 * Real page fault? (PML4E level)
1824 */
1825 if ( (uErr & X86_TRAP_PF_RSVD)
1826 || !pPml4eSrc->n.u1Present
1827 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
1828 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
1829 || (fUserLevelFault && !pPml4eSrc->n.u1User)
1830 )
1831 {
1832 uPageFaultLevel = 0;
1833 goto UpperLevelPageFault;
1834 }
1835 Assert(pPdpeSrc);
1836
1837# else /* PAE */
1838 PX86PDPE pPdpeSrc = &pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[(GCPtrPage >> GST_PDPT_SHIFT) & GST_PDPT_MASK];
1839# endif
1840
1841 /*
1842 * Real page fault? (PDPE level)
1843 */
1844 if ( (uErr & X86_TRAP_PF_RSVD)
1845 || !pPdpeSrc->n.u1Present
1846# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
1847 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
1848 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
1849 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
1850# endif
1851 )
1852 {
1853 uPageFaultLevel = 1;
1854 goto UpperLevelPageFault;
1855 }
1856# endif
1857
1858 /*
1859 * Real page fault? (PDE level)
1860 */
1861 if ( (uErr & X86_TRAP_PF_RSVD)
1862 || !pPdeSrc->n.u1Present
1863# if PGM_WITH_NX(PGM_GST_TYPE)
1864 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
1865# endif
1866 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
1867 || (fUserLevelFault && !pPdeSrc->n.u1User) )
1868 {
1869 uPageFaultLevel = 2;
1870 goto UpperLevelPageFault;
1871 }
1872
1873 /*
1874 * First check the easy case where the page directory has been marked read-only to track
1875 * the dirty bit of an emulated BIG page
1876 */
1877 if (pPdeSrc->b.u1Size && fBigPagesSupported)
1878 {
1879 /* Mark guest page directory as accessed */
1880# if PGM_GST_TYPE == PGM_TYPE_AMD64
1881 pPml4eSrc->n.u1Accessed = 1;
1882 pPdpeSrc->lm.u1Accessed = 1;
1883# endif
1884 pPdeSrc->b.u1Accessed = 1;
1885
1886 /*
1887 * Only write protection page faults are relevant here.
1888 */
1889 if (fWriteFault)
1890 {
1891 /* Mark guest page directory as dirty (BIG page only). */
1892 pPdeSrc->b.u1Dirty = 1;
1893
1894 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
1895 {
1896 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
1897
1898 Assert(pPdeSrc->b.u1Write);
1899
1900 pPdeDst->n.u1Write = 1;
1901 pPdeDst->n.u1Accessed = 1;
1902 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1903 PGM_INVL_BIG_PG(GCPtrPage);
1904 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1905 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
1906 }
1907 }
1908 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1909 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
1910 }
1911 /* else: 4KB page table */
1912
1913 /*
1914 * Map the guest page table.
1915 */
1916 PGSTPT pPTSrc;
1917 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
1918 if (VBOX_SUCCESS(rc))
1919 {
1920 /*
1921 * Real page fault?
1922 */
1923 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
1924 const GSTPTE PteSrc = *pPteSrc;
1925 if ( !PteSrc.n.u1Present
1926# if PGM_WITH_NX(PGM_GST_TYPE)
1927 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
1928# endif
1929 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
1930 || (fUserLevelFault && !PteSrc.n.u1User)
1931 )
1932 {
1933# ifdef IN_GC
1934 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
1935# endif
1936 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1937 LogFlow(("CheckPageFault: real page fault at %VGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
1938
1939 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
1940 * See the 2nd case above as well.
1941 */
1942 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
1943 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
1944
1945 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1946 return VINF_EM_RAW_GUEST_TRAP;
1947 }
1948 LogFlow(("CheckPageFault: page fault at %VGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
1949
1950 /*
1951 * Set the accessed bits in the page directory and the page table.
1952 */
1953# if PGM_GST_TYPE == PGM_TYPE_AMD64
1954 pPml4eSrc->n.u1Accessed = 1;
1955 pPdpeSrc->lm.u1Accessed = 1;
1956# endif
1957 pPdeSrc->n.u1Accessed = 1;
1958 pPteSrc->n.u1Accessed = 1;
1959
1960 /*
1961 * Only write protection page faults are relevant here.
1962 */
1963 if (fWriteFault)
1964 {
1965 /* Write access, so mark guest entry as dirty. */
1966# if defined(IN_GC) && defined(VBOX_WITH_STATISTICS)
1967 if (!pPteSrc->n.u1Dirty)
1968 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtiedPage);
1969 else
1970 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageAlreadyDirty);
1971# endif
1972
1973 pPteSrc->n.u1Dirty = 1;
1974
1975 if (pPdeDst->n.u1Present)
1976 {
1977 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
1978 * Our individual shadow handlers will provide more information and force a fatal exit.
1979 */
1980 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
1981 {
1982 LogRel(("CheckPageFault: write to hypervisor region %VGv\n", GCPtrPage));
1983 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1984 return VINF_SUCCESS;
1985 }
1986
1987 /*
1988 * Map shadow page table.
1989 */
1990 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
1991 if (pShwPage)
1992 {
1993 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1994 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
1995 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
1996 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
1997 {
1998 LogFlow(("DIRTY page trap addr=%VGv\n", GCPtrPage));
1999# ifdef VBOX_STRICT
2000 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2001 if (pPage)
2002 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
2003 ("Unexpected dirty bit tracking on monitored page %VGv (phys %VGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
2004# endif
2005 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
2006
2007 Assert(pPteSrc->n.u1Write);
2008
2009 pPteDst->n.u1Write = 1;
2010 pPteDst->n.u1Dirty = 1;
2011 pPteDst->n.u1Accessed = 1;
2012 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2013 PGM_INVL_PG(GCPtrPage);
2014
2015 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2016 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2017 }
2018 }
2019 else
2020 AssertMsgFailed(("pgmPoolGetPageByHCPhys %VGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2021 }
2022 }
2023/** @todo Optimize accessed bit emulation? */
2024# ifdef VBOX_STRICT
2025 /*
2026 * Sanity check.
2027 */
2028 else if ( !pPteSrc->n.u1Dirty
2029 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2030 && pPdeDst->n.u1Present)
2031 {
2032 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2033 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2034 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2035 if ( pPteDst->n.u1Present
2036 && pPteDst->n.u1Write)
2037 LogFlow(("Writable present page %VGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2038 }
2039# endif /* VBOX_STRICT */
2040 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2041 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2042 }
2043 AssertRC(rc);
2044 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2045 return rc;
2046
2047
2048UpperLevelPageFault:
2049 /* Pagefault detected while checking the PML4E, PDPE or PDE.
2050 * Single exit handler to get rid of duplicate code paths.
2051 */
2052# ifdef IN_GC
2053 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
2054# endif
2055 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
2056 LogFlow(("CheckPageFault: real page fault at %VGv (%d)\n", GCPtrPage, uPageFaultLevel));
2057
2058 if (
2059# if PGM_GST_TYPE == PGM_TYPE_AMD64
2060 pPml4eSrc->n.u1Present &&
2061# endif
2062# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2063 pPdpeSrc->n.u1Present &&
2064# endif
2065 pPdeSrc->n.u1Present)
2066 {
2067 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2068 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2069 {
2070 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2071 }
2072 else
2073 {
2074 /*
2075 * Map the guest page table.
2076 */
2077 PGSTPT pPTSrc;
2078 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2079 if (VBOX_SUCCESS(rc))
2080 {
2081 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2082 const GSTPTE PteSrc = *pPteSrc;
2083 if (pPteSrc->n.u1Present)
2084 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2085 }
2086 AssertRC(rc);
2087 }
2088 }
2089 return VINF_EM_RAW_GUEST_TRAP;
2090}
2091
2092#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
2093
2094
2095/**
2096 * Sync a shadow page table.
2097 *
2098 * The shadow page table is not present. This includes the case where
2099 * there is a conflict with a mapping.
2100 *
2101 * @returns VBox status code.
2102 * @param pVM VM handle.
2103 * @param iPD Page directory index.
2104 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2105 * Assume this is a temporary mapping.
2106 * @param GCPtrPage GC Pointer of the page that caused the fault
2107 */
2108PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PGSTPD pPDSrc, RTGCUINTPTR GCPtrPage)
2109{
2110 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2111 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPtPD[iPDSrc]);
2112 LogFlow(("SyncPT: GCPtrPage=%VGv\n", GCPtrPage));
2113
2114#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2115 || PGM_GST_TYPE == PGM_TYPE_PAE \
2116 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2117 && PGM_SHW_TYPE != PGM_TYPE_NESTED
2118
2119 int rc = VINF_SUCCESS;
2120
2121 /*
2122 * Validate input a little bit.
2123 */
2124 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%VGv\n", iPDSrc, GCPtrPage));
2125# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2126 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2127 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
2128# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2129 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2130 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2131# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2132 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2133 PX86PDPAE pPDDst;
2134 rc = PGMShwGetAllocLongModePDPtr(pVM, GCPtrPage, &pPDDst);
2135 if (rc != VINF_SUCCESS)
2136 {
2137 AssertMsg(rc == VINF_PGM_SYNC_CR3, ("Unexpected rc=%Vrc\n", rc));
2138 return rc;
2139 }
2140 Assert(pPDDst);
2141# endif
2142 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2143 SHWPDE PdeDst = *pPdeDst;
2144
2145# ifndef PGM_WITHOUT_MAPPINGS
2146 /*
2147 * Check for conflicts.
2148 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2149 * HC: Simply resolve the conflict.
2150 */
2151 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2152 {
2153 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2154# ifndef IN_RING3
2155 Log(("SyncPT: Conflict at %VGv\n", GCPtrPage));
2156 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2157 return VERR_ADDRESS_CONFLICT;
2158# else
2159 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2160 Assert(pMapping);
2161# if PGM_GST_TYPE == PGM_TYPE_32BIT
2162 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2163# elif PGM_GST_TYPE == PGM_TYPE_PAE
2164 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2165# else
2166 AssertFailed(); /* can't happen for amd64 */
2167# endif
2168 if (VBOX_FAILURE(rc))
2169 {
2170 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2171 return rc;
2172 }
2173 PdeDst = *pPdeDst;
2174# endif
2175 }
2176# else /* PGM_WITHOUT_MAPPINGS */
2177 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2178# endif /* PGM_WITHOUT_MAPPINGS */
2179 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2180
2181 /*
2182 * Sync page directory entry.
2183 */
2184 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2185 if (PdeSrc.n.u1Present)
2186 {
2187 /*
2188 * Allocate & map the page table.
2189 */
2190 PSHWPT pPTDst;
2191 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2192 PPGMPOOLPAGE pShwPage;
2193 RTGCPHYS GCPhys;
2194 if (fPageTable)
2195 {
2196 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2197# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2198 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2199 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2200# endif
2201 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2202 }
2203 else
2204 {
2205 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
2206# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2207 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2208 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2209# endif
2210 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2211 }
2212 if (rc == VINF_SUCCESS)
2213 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2214 else if (rc == VINF_PGM_CACHED_PAGE)
2215 {
2216 /*
2217 * The PT was cached, just hook it up.
2218 */
2219 if (fPageTable)
2220 PdeDst.u = pShwPage->Core.Key
2221 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2222 else
2223 {
2224 PdeDst.u = pShwPage->Core.Key
2225 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2226 /* (see explanation and assumptions further down.) */
2227 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2228 {
2229 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
2230 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2231 PdeDst.b.u1Write = 0;
2232 }
2233 }
2234 *pPdeDst = PdeDst;
2235 return VINF_SUCCESS;
2236 }
2237 else if (rc == VERR_PGM_POOL_FLUSHED)
2238 return VINF_PGM_SYNC_CR3;
2239 else
2240 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
2241 PdeDst.u &= X86_PDE_AVL_MASK;
2242 PdeDst.u |= pShwPage->Core.Key;
2243
2244 /*
2245 * Page directory has been accessed (this is a fault situation, remember).
2246 */
2247 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2248 if (fPageTable)
2249 {
2250 /*
2251 * Page table - 4KB.
2252 *
2253 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2254 */
2255 Log2(("SyncPT: 4K %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2256 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2257 PGSTPT pPTSrc;
2258 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2259 if (VBOX_SUCCESS(rc))
2260 {
2261 /*
2262 * Start by syncing the page directory entry so CSAM's TLB trick works.
2263 */
2264 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2265 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2266 *pPdeDst = PdeDst;
2267
2268 /*
2269 * Directory/page user or supervisor privilege: (same goes for read/write)
2270 *
2271 * Directory Page Combined
2272 * U/S U/S U/S
2273 * 0 0 0
2274 * 0 1 0
2275 * 1 0 0
2276 * 1 1 1
2277 *
2278 * Simple AND operation. Table listed for completeness.
2279 *
2280 */
2281 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4k));
2282# ifdef PGM_SYNC_N_PAGES
2283 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2284 unsigned iPTDst = iPTBase;
2285 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
2286 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2287 iPTDst = 0;
2288 else
2289 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2290# else /* !PGM_SYNC_N_PAGES */
2291 unsigned iPTDst = 0;
2292 const unsigned iPTDstEnd = ELEMENTS(pPTDst->a);
2293# endif /* !PGM_SYNC_N_PAGES */
2294# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2295 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2296 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2297# else
2298 const unsigned offPTSrc = 0;
2299# endif
2300 for (; iPTDst < iPTDstEnd; iPTDst++)
2301 {
2302 const unsigned iPTSrc = iPTDst + offPTSrc;
2303 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2304
2305 if (PteSrc.n.u1Present) /* we've already cleared it above */
2306 {
2307# ifndef IN_RING0
2308 /*
2309 * Assuming kernel code will be marked as supervisor - and not as user level
2310 * and executed using a conforming code selector - And marked as readonly.
2311 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2312 */
2313 PPGMPAGE pPage;
2314 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2315 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2316 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2317 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2318 )
2319# endif
2320 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2321 Log2(("SyncPT: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%VGp\n",
2322 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2323 PteSrc.n.u1Present,
2324 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2325 PteSrc.n.u1User & PdeSrc.n.u1User,
2326 (uint64_t)PteSrc.u,
2327 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2328 (PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)));
2329 }
2330 } /* for PTEs */
2331 }
2332 }
2333 else
2334 {
2335 /*
2336 * Big page - 2/4MB.
2337 *
2338 * We'll walk the ram range list in parallel and optimize lookups.
2339 * We will only sync on shadow page table at a time.
2340 */
2341 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4M));
2342
2343 /**
2344 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2345 */
2346
2347 /*
2348 * Start by syncing the page directory entry.
2349 */
2350 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2351 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2352
2353 /*
2354 * If the page is not flagged as dirty and is writable, then make it read-only
2355 * at PD level, so we can set the dirty bit when the page is modified.
2356 *
2357 * ASSUMES that page access handlers are implemented on page table entry level.
2358 * Thus we will first catch the dirty access and set PDE.D and restart. If
2359 * there is an access handler, we'll trap again and let it work on the problem.
2360 */
2361 /** @todo move the above stuff to a section in the PGM documentation. */
2362 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2363 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2364 {
2365 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
2366 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2367 PdeDst.b.u1Write = 0;
2368 }
2369 *pPdeDst = PdeDst;
2370
2371 /*
2372 * Fill the shadow page table.
2373 */
2374 /* Get address and flags from the source PDE. */
2375 SHWPTE PteDstBase;
2376 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2377
2378 /* Loop thru the entries in the shadow PT. */
2379 const RTGCUINTPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2380 Log2(("SyncPT: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%VGv GCPhys=%VGp %s\n",
2381 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2382 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2383 PPGMRAMRANGE pRam = CTXALLSUFF(pVM->pgm.s.pRamRanges);
2384 unsigned iPTDst = 0;
2385 while (iPTDst < ELEMENTS(pPTDst->a))
2386 {
2387 /* Advance ram range list. */
2388 while (pRam && GCPhys > pRam->GCPhysLast)
2389 pRam = CTXALLSUFF(pRam->pNext);
2390 if (pRam && GCPhys >= pRam->GCPhys)
2391 {
2392 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2393 do
2394 {
2395 /* Make shadow PTE. */
2396 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2397 SHWPTE PteDst;
2398
2399 /* Make sure the RAM has already been allocated. */
2400 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC) /** @todo PAGE FLAGS */
2401 {
2402 if (RT_UNLIKELY(!PGM_PAGE_GET_HCPHYS(pPage)))
2403 {
2404# ifdef IN_RING3
2405 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2406# else
2407 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2408# endif
2409 if (rc != VINF_SUCCESS)
2410 return rc;
2411 }
2412 }
2413
2414 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2415 {
2416 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2417 {
2418 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2419 PteDst.n.u1Write = 0;
2420 }
2421 else
2422 PteDst.u = 0;
2423 }
2424# ifndef IN_RING0
2425 /*
2426 * Assuming kernel code will be marked as supervisor and not as user level and executed
2427 * using a conforming code selector. Don't check for readonly, as that implies the whole
2428 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2429 */
2430 else if ( !PdeSrc.n.u1User
2431 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2432 PteDst.u = 0;
2433# endif
2434 else
2435 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2436# ifdef PGMPOOL_WITH_USER_TRACKING
2437 if (PteDst.n.u1Present)
2438 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, pPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst); /** @todo PAGE FLAGS */
2439# endif
2440 /* commit it */
2441 pPTDst->a[iPTDst] = PteDst;
2442 Log4(("SyncPT: BIG %VGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2443 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2444 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2445
2446 /* advance */
2447 GCPhys += PAGE_SIZE;
2448 iHCPage++;
2449 iPTDst++;
2450 } while ( iPTDst < ELEMENTS(pPTDst->a)
2451 && GCPhys <= pRam->GCPhysLast);
2452 }
2453 else if (pRam)
2454 {
2455 Log(("Invalid pages at %VGp\n", GCPhys));
2456 do
2457 {
2458 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2459 GCPhys += PAGE_SIZE;
2460 iPTDst++;
2461 } while ( iPTDst < ELEMENTS(pPTDst->a)
2462 && GCPhys < pRam->GCPhys);
2463 }
2464 else
2465 {
2466 Log(("Invalid pages at %VGp (2)\n", GCPhys));
2467 for ( ; iPTDst < ELEMENTS(pPTDst->a); iPTDst++)
2468 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2469 }
2470 } /* while more PTEs */
2471 } /* 4KB / 4MB */
2472 }
2473 else
2474 AssertRelease(!PdeDst.n.u1Present);
2475
2476 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2477# ifdef IN_GC
2478 if (VBOX_FAILURE(rc))
2479 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPTFailed));
2480# endif
2481 return rc;
2482
2483#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2484 && PGM_SHW_TYPE != PGM_TYPE_NESTED
2485
2486 int rc = VINF_SUCCESS;
2487
2488 /*
2489 * Validate input a little bit.
2490 */
2491# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2492 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
2493# else
2494 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2495# endif
2496 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2497 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2498 SHWPDE PdeDst = *pPdeDst;
2499
2500 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2501 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2502
2503 GSTPDE PdeSrc;
2504 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2505 PdeSrc.n.u1Present = 1;
2506 PdeSrc.n.u1Write = 1;
2507 PdeSrc.n.u1Accessed = 1;
2508 PdeSrc.n.u1User = 1;
2509
2510 /*
2511 * Allocate & map the page table.
2512 */
2513 PSHWPT pPTDst;
2514 PPGMPOOLPAGE pShwPage;
2515 RTGCPHYS GCPhys;
2516
2517 /* Virtual address = physical address */
2518 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK_32;
2519 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2520
2521 if ( rc == VINF_SUCCESS
2522 || rc == VINF_PGM_CACHED_PAGE)
2523 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2524 else
2525 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
2526
2527 PdeDst.u &= X86_PDE_AVL_MASK;
2528 PdeDst.u |= pShwPage->Core.Key;
2529 PdeDst.n.u1Present = 1;
2530 PdeDst.n.u1Write = 1;
2531 PdeDst.n.u1User = 1;
2532 *pPdeDst = PdeDst;
2533
2534 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2535 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2536 return rc;
2537
2538#else
2539 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2540 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2541 return VERR_INTERNAL_ERROR;
2542#endif
2543}
2544
2545
2546
2547/**
2548 * Prefetch a page/set of pages.
2549 *
2550 * Typically used to sync commonly used pages before entering raw mode
2551 * after a CR3 reload.
2552 *
2553 * @returns VBox status code.
2554 * @param pVM VM handle.
2555 * @param GCPtrPage Page to invalidate.
2556 */
2557PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage)
2558{
2559#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2560 && PGM_SHW_TYPE != PGM_TYPE_NESTED
2561 /*
2562 * Check that all Guest levels thru the PDE are present, getting the
2563 * PD and PDE in the processes.
2564 */
2565 int rc = VINF_SUCCESS;
2566# if PGM_WITH_PAGING(PGM_GST_TYPE)
2567# if PGM_GST_TYPE == PGM_TYPE_32BIT
2568 const unsigned iPDSrc = (RTGCUINTPTR)GCPtrPage >> GST_PD_SHIFT;
2569 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2570# elif PGM_GST_TYPE == PGM_TYPE_PAE
2571 unsigned iPDSrc;
2572 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
2573 if (!pPDSrc)
2574 return VINF_SUCCESS; /* not present */
2575# elif PGM_GST_TYPE == PGM_TYPE_AMD64
2576 unsigned iPDSrc;
2577 PX86PML4E pPml4e;
2578 X86PDPE Pdpe;
2579 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4e, &Pdpe, &iPDSrc);
2580 if (!pPDSrc)
2581 return VINF_SUCCESS; /* not present */
2582# endif
2583 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2584# else
2585 PGSTPD pPDSrc = NULL;
2586 const unsigned iPDSrc = 0;
2587 GSTPDE PdeSrc;
2588
2589 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2590 PdeSrc.n.u1Present = 1;
2591 PdeSrc.n.u1Write = 1;
2592 PdeSrc.n.u1Accessed = 1;
2593 PdeSrc.n.u1User = 1;
2594# endif
2595
2596 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
2597 {
2598# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2599 const X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> SHW_PD_SHIFT];
2600# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2601 const X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> SHW_PD_SHIFT];
2602# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2603 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2604 PX86PDPAE pPDDst;
2605 PX86PDPT pPdptDst;
2606 X86PDEPAE PdeDst;
2607
2608 int rc = PGMShwGetLongModePDPtr(pVM, GCPtrPage, &pPdptDst, &pPDDst);
2609 if (rc != VINF_SUCCESS)
2610 {
2611 AssertMsg(rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc=%Vrc\n", rc));
2612 return rc;
2613 }
2614 Assert(pPDDst);
2615 PdeDst = pPDDst->a[iPDDst];
2616# endif
2617 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
2618 {
2619 if (!PdeDst.n.u1Present)
2620 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
2621 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2622 else
2623 {
2624 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
2625 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
2626 * makes no sense to prefetch more than one page.
2627 */
2628 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2629 if (VBOX_SUCCESS(rc))
2630 rc = VINF_SUCCESS;
2631 }
2632 }
2633 }
2634 return rc;
2635#elif PGM_SHW_TYPE == PGM_TYPE_NESTED
2636 return VINF_SUCCESS; /* ignore */
2637#endif
2638}
2639
2640
2641
2642
2643/**
2644 * Syncs a page during a PGMVerifyAccess() call.
2645 *
2646 * @returns VBox status code (informational included).
2647 * @param GCPtrPage The address of the page to sync.
2648 * @param fPage The effective guest page flags.
2649 * @param uErr The trap error code.
2650 */
2651PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR GCPtrPage, unsigned fPage, unsigned uErr)
2652{
2653 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%VGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
2654
2655 Assert(!HWACCMIsNestedPagingActive(pVM));
2656#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
2657 && PGM_SHW_TYPE != PGM_TYPE_NESTED
2658
2659# ifndef IN_RING0
2660 if (!(fPage & X86_PTE_US))
2661 {
2662 /*
2663 * Mark this page as safe.
2664 */
2665 /** @todo not correct for pages that contain both code and data!! */
2666 Log(("CSAMMarkPage %VGv; scanned=%d\n", GCPtrPage, true));
2667 CSAMMarkPage(pVM, (RTRCPTR)GCPtrPage, true);
2668 }
2669# endif
2670 /*
2671 * Get guest PD and index.
2672 */
2673
2674# if PGM_WITH_PAGING(PGM_GST_TYPE)
2675# if PGM_GST_TYPE == PGM_TYPE_32BIT
2676 const unsigned iPDSrc = (RTGCUINTPTR)GCPtrPage >> GST_PD_SHIFT;
2677 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2678# elif PGM_GST_TYPE == PGM_TYPE_PAE
2679 unsigned iPDSrc;
2680 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
2681
2682 if (pPDSrc)
2683 {
2684 Log(("PGMVerifyAccess: access violation for %VGv due to non-present PDPTR\n", GCPtrPage));
2685 return VINF_EM_RAW_GUEST_TRAP;
2686 }
2687# elif PGM_GST_TYPE == PGM_TYPE_AMD64
2688 unsigned iPDSrc;
2689 PX86PML4E pPml4e;
2690 X86PDPE Pdpe;
2691 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4e, &Pdpe, &iPDSrc);
2692 if (!pPDSrc)
2693 {
2694 Log(("PGMVerifyAccess: access violation for %VGv due to non-present PDPTR\n", GCPtrPage));
2695 return VINF_EM_RAW_GUEST_TRAP;
2696 }
2697# endif
2698# else
2699 PGSTPD pPDSrc = NULL;
2700 const unsigned iPDSrc = 0;
2701# endif
2702 int rc = VINF_SUCCESS;
2703
2704 /*
2705 * First check if the shadow pd is present.
2706 */
2707# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2708 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> SHW_PD_SHIFT];
2709# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2710 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> SHW_PD_SHIFT];
2711# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2712 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2713 PX86PDPAE pPDDst;
2714 PX86PDEPAE pPdeDst;
2715
2716 rc = PGMShwGetAllocLongModePDPtr(pVM, GCPtrPage, &pPDDst);
2717 if (rc != VINF_SUCCESS)
2718 {
2719 AssertMsg(rc == VINF_PGM_SYNC_CR3, ("Unexpected rc=%Vrc\n", rc));
2720 return rc;
2721 }
2722 Assert(pPDDst);
2723 pPdeDst = &pPDDst->a[iPDDst];
2724# endif
2725 if (!pPdeDst->n.u1Present)
2726 {
2727 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2728 AssertRC(rc);
2729 if (rc != VINF_SUCCESS)
2730 return rc;
2731 }
2732
2733# if PGM_WITH_PAGING(PGM_GST_TYPE)
2734 /* Check for dirty bit fault */
2735 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
2736 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
2737 Log(("PGMVerifyAccess: success (dirty)\n"));
2738 else
2739 {
2740 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2741#else
2742 {
2743 GSTPDE PdeSrc;
2744 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2745 PdeSrc.n.u1Present = 1;
2746 PdeSrc.n.u1Write = 1;
2747 PdeSrc.n.u1Accessed = 1;
2748 PdeSrc.n.u1User = 1;
2749
2750#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
2751 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
2752 if (uErr & X86_TRAP_PF_US)
2753 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
2754 else /* supervisor */
2755 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
2756
2757 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2758 if (VBOX_SUCCESS(rc))
2759 {
2760 /* Page was successfully synced */
2761 Log2(("PGMVerifyAccess: success (sync)\n"));
2762 rc = VINF_SUCCESS;
2763 }
2764 else
2765 {
2766 Log(("PGMVerifyAccess: access violation for %VGv rc=%d\n", GCPtrPage, rc));
2767 return VINF_EM_RAW_GUEST_TRAP;
2768 }
2769 }
2770 return rc;
2771
2772#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2773
2774 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2775 return VERR_INTERNAL_ERROR;
2776#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2777}
2778
2779
2780#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
2781# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
2782/**
2783 * Figures out which kind of shadow page this guest PDE warrants.
2784 *
2785 * @returns Shadow page kind.
2786 * @param pPdeSrc The guest PDE in question.
2787 * @param cr4 The current guest cr4 value.
2788 */
2789DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const GSTPDE *pPdeSrc, uint32_t cr4)
2790{
2791 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
2792 return BTH_PGMPOOLKIND_PT_FOR_PT;
2793 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
2794 //{
2795 // case 0:
2796 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
2797 // case X86_PDE4M_RW:
2798 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
2799 // case X86_PDE4M_US:
2800 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
2801 // case X86_PDE4M_RW | X86_PDE4M_US:
2802 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
2803# if 0
2804 // case X86_PDE4M_PAE_NX:
2805 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
2806 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
2807 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
2808 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
2809 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
2810 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
2811 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
2812# endif
2813 return BTH_PGMPOOLKIND_PT_FOR_BIG;
2814 //}
2815}
2816# endif
2817#endif
2818
2819#undef MY_STAM_COUNTER_INC
2820#define MY_STAM_COUNTER_INC(a) do { } while (0)
2821
2822
2823/**
2824 * Syncs the paging hierarchy starting at CR3.
2825 *
2826 * @returns VBox status code, no specials.
2827 * @param pVM The virtual machine.
2828 * @param cr0 Guest context CR0 register
2829 * @param cr3 Guest context CR3 register
2830 * @param cr4 Guest context CR4 register
2831 * @param fGlobal Including global page directories or not
2832 */
2833PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
2834{
2835#if PGM_SHW_TYPE == PGM_TYPE_NESTED
2836 /** @todo check if this is really necessary */
2837 HWACCMFlushTLB(pVM);
2838 return VINF_SUCCESS;
2839
2840#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED */
2841 if (VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
2842 fGlobal = true; /* Change this CR3 reload to be a global one. */
2843
2844 /*
2845 * Update page access handlers.
2846 * The virtual are always flushed, while the physical are only on demand.
2847 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
2848 * have to look into that later because it will have a bad influence on the performance.
2849 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
2850 * bird: Yes, but that won't work for aliases.
2851 */
2852 /** @todo this MUST go away. See #1557. */
2853 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
2854 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
2855 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
2856
2857# ifdef PGMPOOL_WITH_MONITORING
2858 /*
2859 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2860 * Occationally we will have to clear all the shadow page tables because we wanted
2861 * to monitor a page which was mapped by too many shadowed page tables. This operation
2862 * sometimes refered to as a 'lightweight flush'.
2863 */
2864 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2865 pgmPoolMonitorModifiedClearAll(pVM);
2866 else
2867 {
2868# ifdef IN_RING3
2869 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2870 pgmPoolClearAll(pVM);
2871# else
2872 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2873 return VINF_PGM_SYNC_CR3;
2874# endif
2875 }
2876# endif
2877
2878 Assert(fGlobal || (cr4 & X86_CR4_PGE));
2879 MY_STAM_COUNTER_INC(fGlobal ? &pVM->pgm.s.CTXMID(Stat,SyncCR3Global) : &pVM->pgm.s.CTXMID(Stat,SyncCR3NotGlobal));
2880
2881# if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
2882 /*
2883 * Get page directory addresses.
2884 */
2885# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2886 PX86PDE pPDEDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[0];
2887# else /* PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64*/
2888# if PGM_GST_TYPE == PGM_TYPE_32BIT
2889 PX86PDEPAE pPDEDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[0];
2890# endif
2891# endif
2892
2893# if PGM_GST_TYPE == PGM_TYPE_32BIT
2894 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2895 Assert(pPDSrc);
2896# ifndef IN_GC
2897 Assert(MMPhysGCPhys2HCVirt(pVM, (RTGCPHYS)(cr3 & GST_CR3_PAGE_MASK), sizeof(*pPDSrc)) == pPDSrc);
2898# endif
2899# endif
2900
2901 /*
2902 * Iterate the page directory.
2903 */
2904 PPGMMAPPING pMapping;
2905 unsigned iPdNoMapping;
2906 const bool fRawR0Enabled = EMIsRawRing0Enabled(pVM);
2907 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2908
2909 /* Only check mappings if they are supposed to be put into the shadow page table. */
2910 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
2911 {
2912 pMapping = pVM->pgm.s.CTXALLSUFF(pMappings);
2913 iPdNoMapping = (pMapping) ? (pMapping->GCPtr >> GST_PD_SHIFT) : ~0U;
2914 }
2915 else
2916 {
2917 pMapping = 0;
2918 iPdNoMapping = ~0U;
2919 }
2920# if PGM_GST_TYPE == PGM_TYPE_AMD64
2921 for (uint64_t iPML4E = 0; iPML4E < X86_PG_PAE_ENTRIES; iPML4E++)
2922 {
2923# else
2924 {
2925# endif
2926# if PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
2927 for (uint64_t iPDPTE = 0; iPDPTE < GST_PDPE_ENTRIES; iPDPTE++)
2928 {
2929 unsigned iPDSrc;
2930# if PGM_GST_TYPE == PGM_TYPE_PAE
2931 PX86PDPAE pPDPAE = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2932 PX86PDEPAE pPDEDst = &pPDPAE->a[iPDPTE * X86_PG_PAE_ENTRIES];
2933 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, iPDPTE << X86_PDPT_SHIFT, &iPDSrc);
2934# else
2935 PX86PML4E pPml4eSrc;
2936 X86PDPE PdpeSrc;
2937 PX86PDPT pPdptDst;
2938 PX86PDPAE pPDDst;
2939 PX86PDEPAE pPDEDst;
2940 RTGCUINTPTR GCPtr = (iPML4E << X86_PML4_SHIFT) || (iPDPTE << X86_PDPT_SHIFT);
2941 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
2942
2943 int rc = PGMShwGetLongModePDPtr(pVM, GCPtr, &pPdptDst, &pPDDst);
2944 if (rc != VINF_SUCCESS)
2945 {
2946 AssertMsg(rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc=%Vrc\n", rc));
2947 return rc;
2948 }
2949 Assert(pPDDst);
2950 pPDEDst = &pPDDst->a[0];
2951
2952 if (!pPml4eSrc->n.u1Present)
2953 {
2954 /* Guest PML4 not present (anymore). */
2955 if (pVM->pgm.s.CTXMID(p,PaePML4)->a[iPML4E].n.u1Present)
2956 {
2957 /* Shadow PML4 present, so free all pdpt & pd entries. */
2958 for (iPDPTE = 0; iPDPTE < ELEMENTS(pPdptDst->a); iPDPTE++)
2959 {
2960 if (pPdptDst->a[iPDPTE].n.u1Present)
2961 {
2962 GCPtr = (iPML4E << X86_PML4_SHIFT) || (iPDPTE << X86_PDPT_SHIFT);
2963
2964 rc = PGMShwGetLongModePDPtr(pVM, GCPtr, &pPdptDst, &pPDDst);
2965 if (rc != VINF_SUCCESS)
2966 {
2967 AssertMsg(rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc=%Vrc\n", rc));
2968 return rc;
2969 }
2970
2971 for (unsigned iPD = 0; iPD < ELEMENTS(pPDDst->a); iPD++)
2972 {
2973 if ( pPDDst->a[iPD].n.u1Present
2974 && !(pPDDst->a[iPD].u & PGM_PDFLAGS_MAPPING))
2975 {
2976 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDDst->a[iPD].u & SHW_PDE_PG_MASK), PGMPOOL_IDX_PAE_PD, (iPML4E * X86_PG_PAE_ENTRIES + iPDPTE) * X86_PG_PAE_ENTRIES + iPD);
2977 pPDDst->a[iPD].u = 0;
2978 }
2979 }
2980
2981 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPdptDst->a[iPDPTE].u & SHW_PDE_PG_MASK), PGMPOOL_IDX_PDPT, iPDPTE);
2982 pPdptDst->a[iPDPTE].u = 0;
2983 }
2984 }
2985 }
2986 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pVM->pgm.s.CTXMID(p,PaePML4)->a[iPML4E].u & SHW_PDE_PG_MASK), PGMPOOL_IDX_PML4, iPML4E);
2987 pVM->pgm.s.CTXMID(p,PaePML4)->a[iPML4E].n.u1Present = 0;
2988 break;
2989 }
2990# endif
2991 Assert(iPDSrc == 0);
2992
2993 if (pPDSrc == NULL)
2994 {
2995 /* PDPE not present */
2996 if (pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].n.u1Present)
2997 {
2998 /* for each page directory entry */
2999 for (unsigned iPD = 0; iPD < ELEMENTS(pPDSrc->a); iPD++)
3000 {
3001 if ( pPDEDst[iPD].n.u1Present
3002 && !(pPDEDst[iPD].u & PGM_PDFLAGS_MAPPING))
3003 {
3004# if PGM_GST_TYPE == PGM_TYPE_AMD64
3005 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst[iPD].u & SHW_PDE_PG_MASK), PGMPOOL_IDX_PAE_PD, (iPML4E * X86_PG_PAE_ENTRIES + iPDPTE) * X86_PG_PAE_ENTRIES + iPD);
3006# else
3007 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst[iPD].u & SHW_PDE_PG_MASK), PGMPOOL_IDX_PAE_PD, iPDPTE * X86_PG_PAE_ENTRIES + iPD);
3008# endif
3009 pPDEDst[iPD].u = 0;
3010 }
3011 }
3012 }
3013 if (!(pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].u & PGM_PLXFLAGS_MAPPING))
3014 pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPDPTE].n.u1Present = 0;
3015 continue;
3016 }
3017# else /* PGM_GST_TYPE != PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_AMD64 */
3018 {
3019# endif /* PGM_GST_TYPE != PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_AMD64 */
3020 for (unsigned iPD = 0; iPD < ELEMENTS(pPDSrc->a); iPD++)
3021 {
3022# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3023 Assert(&pVM->pgm.s.CTXMID(p,32BitPD)->a[iPD] == pPDEDst);
3024# elif PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3025 AssertMsg(&pVM->pgm.s.CTXMID(ap,PaePDs)[iPD * 2 / 512]->a[iPD * 2 % 512] == pPDEDst, ("%p vs %p\n", &pVM->pgm.s.CTXMID(ap,PaePDs)[iPD * 2 / 512]->a[iPD * 2 % 512], pPDEDst));
3026# endif
3027 register GSTPDE PdeSrc = pPDSrc->a[iPD];
3028 if ( PdeSrc.n.u1Present
3029 && (PdeSrc.n.u1User || fRawR0Enabled))
3030 {
3031# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3032 || PGM_GST_TYPE == PGM_TYPE_PAE) \
3033 && !defined(PGM_WITHOUT_MAPPINGS)
3034
3035 /*
3036 * Check for conflicts with GC mappings.
3037 */
3038# if PGM_GST_TYPE == PGM_TYPE_PAE
3039 if (iPD + iPDPTE * X86_PG_PAE_ENTRIES == iPdNoMapping)
3040# else
3041 if (iPD == iPdNoMapping)
3042# endif
3043 {
3044 if (pVM->pgm.s.fMappingsFixed)
3045 {
3046 /* It's fixed, just skip the mapping. */
3047 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
3048 iPD += cPTs - 1;
3049 pPDEDst += cPTs + (PGM_GST_TYPE != PGM_SHW_TYPE) * cPTs; /* Only applies to the pae shadow and 32 bits guest case */
3050 pMapping = pMapping->CTXALLSUFF(pNext);
3051 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3052 continue;
3053 }
3054# ifdef IN_RING3
3055# if PGM_GST_TYPE == PGM_TYPE_32BIT
3056 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
3057# elif PGM_GST_TYPE == PGM_TYPE_PAE
3058 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPDPTE << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
3059# endif
3060 if (VBOX_FAILURE(rc))
3061 return rc;
3062
3063 /*
3064 * Update iPdNoMapping and pMapping.
3065 */
3066 pMapping = pVM->pgm.s.pMappingsR3;
3067 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
3068 pMapping = pMapping->pNextR3;
3069 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3070# else
3071 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3072 return VINF_PGM_SYNC_CR3;
3073# endif
3074 }
3075# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3076 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3077# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3078 /*
3079 * Sync page directory entry.
3080 *
3081 * The current approach is to allocated the page table but to set
3082 * the entry to not-present and postpone the page table synching till
3083 * it's actually used.
3084 */
3085# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3086 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
3087# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3088 const unsigned iPdShw = iPD + iPDPTE * X86_PG_PAE_ENTRIES; NOREF(iPdShw);
3089# else
3090 const unsigned iPdShw = iPD; NOREF(iPdShw);
3091# endif
3092 {
3093 SHWPDE PdeDst = *pPDEDst;
3094 if (PdeDst.n.u1Present)
3095 {
3096 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
3097 RTGCPHYS GCPhys;
3098 if ( !PdeSrc.b.u1Size
3099 || !(cr4 & X86_CR4_PSE))
3100 {
3101 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
3102# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3103 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3104 GCPhys |= i * (PAGE_SIZE / 2);
3105# endif
3106 }
3107 else
3108 {
3109 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
3110# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3111 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
3112 GCPhys |= i * X86_PAGE_2M_SIZE;
3113# endif
3114 }
3115
3116 if ( pShwPage->GCPhys == GCPhys
3117 && pShwPage->enmKind == PGM_BTH_NAME(CalcPageKind)(&PdeSrc, cr4)
3118 && ( pShwPage->fCached
3119 || ( !fGlobal
3120 && ( false
3121# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
3122 || ( (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
3123 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE)) /* global 2/4MB page. */
3124 || ( !pShwPage->fSeenNonGlobal
3125 && (cr4 & X86_CR4_PGE))
3126# endif
3127 )
3128 )
3129 )
3130 && ( (PdeSrc.u & (X86_PDE_US | X86_PDE_RW)) == (PdeDst.u & (X86_PDE_US | X86_PDE_RW))
3131 || ( (cr4 & X86_CR4_PSE)
3132 && ((PdeSrc.u & (X86_PDE_US | X86_PDE4M_PS | X86_PDE4M_D)) | PGM_PDFLAGS_TRACK_DIRTY)
3133 == ((PdeDst.u & (X86_PDE_US | X86_PDE_RW | PGM_PDFLAGS_TRACK_DIRTY)) | X86_PDE4M_PS))
3134 )
3135 )
3136 {
3137# ifdef VBOX_WITH_STATISTICS
3138 if ( !fGlobal
3139 && (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
3140 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE))
3141 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPD));
3142 else if (!fGlobal && !pShwPage->fSeenNonGlobal && (cr4 & X86_CR4_PGE))
3143 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPT));
3144 else
3145 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstCacheHit));
3146# endif /* VBOX_WITH_STATISTICS */
3147 /** @todo a replacement strategy isn't really needed unless we're using a very small pool < 512 pages.
3148 * The whole ageing stuff should be put in yet another set of #ifdefs. For now, let's just skip it. */
3149 //# ifdef PGMPOOL_WITH_CACHE
3150 // pgmPoolCacheUsed(pPool, pShwPage);
3151 //# endif
3152 }
3153 else
3154 {
3155 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPdShw);
3156 pPDEDst->u = 0;
3157 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreed));
3158 }
3159 }
3160 else
3161 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstNotPresent));
3162 pPDEDst++;
3163 }
3164 }
3165# if PGM_GST_TYPE == PGM_TYPE_PAE
3166 else if (iPD + iPDPTE * X86_PG_PAE_ENTRIES != iPdNoMapping)
3167# else
3168 else if (iPD != iPdNoMapping)
3169# endif
3170 {
3171 /*
3172 * Check if there is any page directory to mark not present here.
3173 */
3174# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3175 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
3176# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3177 const unsigned iPdShw = iPD + iPDPTE * X86_PG_PAE_ENTRIES; NOREF(iPdShw);
3178# else
3179 const unsigned iPdShw = iPD; NOREF(iPdShw);
3180# endif
3181 {
3182 if (pPDEDst->n.u1Present)
3183 {
3184 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst->u & SHW_PDE_PG_MASK), SHW_POOL_ROOT_IDX, iPdShw);
3185 pPDEDst->u = 0;
3186 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreedSrcNP));
3187 }
3188 pPDEDst++;
3189 }
3190 }
3191 else
3192 {
3193# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3194 || PGM_GST_TYPE == PGM_TYPE_PAE) \
3195 && !defined(PGM_WITHOUT_MAPPINGS)
3196
3197 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
3198
3199 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3200 if (pVM->pgm.s.fMappingsFixed)
3201 {
3202 /* It's fixed, just skip the mapping. */
3203 pMapping = pMapping->CTXALLSUFF(pNext);
3204 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3205 }
3206 else
3207 {
3208 /*
3209 * Check for conflicts for subsequent pagetables
3210 * and advance to the next mapping.
3211 */
3212 iPdNoMapping = ~0U;
3213 unsigned iPT = cPTs;
3214 while (iPT-- > 1)
3215 {
3216 if ( pPDSrc->a[iPD + iPT].n.u1Present
3217 && (pPDSrc->a[iPD + iPT].n.u1User || fRawR0Enabled))
3218 {
3219# ifdef IN_RING3
3220# if PGM_GST_TYPE == PGM_TYPE_32BIT
3221 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
3222# elif PGM_GST_TYPE == PGM_TYPE_PAE
3223 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPDPTE << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
3224# endif
3225 if (VBOX_FAILURE(rc))
3226 return rc;
3227
3228 /*
3229 * Update iPdNoMapping and pMapping.
3230 */
3231 pMapping = pVM->pgm.s.CTXALLSUFF(pMappings);
3232 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
3233 pMapping = pMapping->CTXALLSUFF(pNext);
3234 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3235 break;
3236# else
3237 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3238 return VINF_PGM_SYNC_CR3;
3239# endif
3240 }
3241 }
3242 if (iPdNoMapping == ~0U && pMapping)
3243 {
3244 pMapping = pMapping->CTXALLSUFF(pNext);
3245 if (pMapping)
3246 iPdNoMapping = pMapping->GCPtr >> GST_PD_SHIFT;
3247 }
3248 }
3249
3250 /* advance. */
3251 iPD += cPTs - 1;
3252 pPDEDst += cPTs + (PGM_GST_TYPE != PGM_SHW_TYPE) * cPTs; /* Only applies to the pae shadow and 32 bits guest case */
3253# if PGM_GST_TYPE != PGM_SHW_TYPE
3254 AssertCompile(PGM_GST_TYPE == PGM_TYPE_32BIT && PGM_SHW_TYPE == PGM_TYPE_PAE);
3255# endif
3256# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3257 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3258# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3259 }
3260
3261 } /* for iPD */
3262 } /* for each PDPTE (PAE) */
3263 } /* for each page map level 4 entry (amd64) */
3264 return VINF_SUCCESS;
3265
3266# else /* guest real and protected mode */
3267 return VINF_SUCCESS;
3268# endif
3269#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED */
3270}
3271
3272
3273
3274
3275#ifdef VBOX_STRICT
3276#ifdef IN_GC
3277# undef AssertMsgFailed
3278# define AssertMsgFailed Log
3279#endif
3280#ifdef IN_RING3
3281# include <VBox/dbgf.h>
3282
3283/**
3284 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3285 *
3286 * @returns VBox status code (VINF_SUCCESS).
3287 * @param pVM The VM handle.
3288 * @param cr3 The root of the hierarchy.
3289 * @param crr The cr4, only PAE and PSE is currently used.
3290 * @param fLongMode Set if long mode, false if not long mode.
3291 * @param cMaxDepth Number of levels to dump.
3292 * @param pHlp Pointer to the output functions.
3293 */
3294__BEGIN_DECLS
3295PGMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3296__END_DECLS
3297
3298#endif
3299
3300/**
3301 * Checks that the shadow page table is in sync with the guest one.
3302 *
3303 * @returns The number of errors.
3304 * @param pVM The virtual machine.
3305 * @param cr3 Guest context CR3 register
3306 * @param cr4 Guest context CR4 register
3307 * @param GCPtr Where to start. Defaults to 0.
3308 * @param cb How much to check. Defaults to everything.
3309 */
3310PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCUINTPTR GCPtr, RTGCUINTPTR cb)
3311{
3312#if PGM_SHW_TYPE == PGM_TYPE_NESTED
3313 return 0;
3314#else
3315 unsigned cErrors = 0;
3316
3317#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3318 || PGM_GST_TYPE == PGM_TYPE_PAE
3319
3320 PPGM pPGM = &pVM->pgm.s;
3321 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3322 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3323# ifndef IN_RING0
3324 RTHCPHYS HCPhys; /* general usage. */
3325# endif
3326 int rc;
3327
3328 /*
3329 * Check that the Guest CR3 and all its mappings are correct.
3330 */
3331 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3332 ("Invalid GCPhysCR3=%VGp cr3=%VGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3333 false);
3334# ifndef IN_RING0
3335# if PGM_GST_TYPE == PGM_TYPE_32BIT
3336 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGuestPDGC, NULL, &HCPhysShw);
3337# else
3338 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGstPaePDPTGC, NULL, &HCPhysShw);
3339# endif
3340 AssertRCReturn(rc, 1);
3341 HCPhys = NIL_RTHCPHYS;
3342 rc = pgmRamGCPhys2HCPhys(pPGM, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3343 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%VHp HCPhyswShw=%VHp (cr3)\n", HCPhys, HCPhysShw), false);
3344# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3345 RTGCPHYS GCPhys;
3346 rc = PGMR3DbgHCPtr2GCPhys(pVM, pPGM->pGuestPDHC, &GCPhys);
3347 AssertRCReturn(rc, 1);
3348 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%VGp cr3=%VGp\n", GCPhys, (RTGCPHYS)cr3), false);
3349# endif
3350#endif /* !IN_RING0 */
3351
3352# if PGM_GST_TYPE == PGM_TYPE_32BIT
3353 const GSTPD *pPDSrc = CTXSUFF(pPGM->pGuestPD);
3354# endif
3355
3356 /*
3357 * Get and check the Shadow CR3.
3358 */
3359# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3360 const X86PD *pPDDst = pPGM->CTXMID(p,32BitPD);
3361 unsigned cPDEs = ELEMENTS(pPDDst->a);
3362# else
3363 const X86PDPAE *pPDDst = pPGM->CTXMID(ap,PaePDs[0]); /* use it as a 2048 entry PD */
3364 unsigned cPDEs = ELEMENTS(pPDDst->a) * ELEMENTS(pPGM->apHCPaePDs);
3365# endif
3366 if (cb != ~(RTGCUINTPTR)0)
3367 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3368
3369/** @todo call the other two PGMAssert*() functions. */
3370
3371# if PGM_GST_TYPE == PGM_TYPE_PAE
3372 /*
3373 * Check the 4 PDPTs too.
3374 */
3375 for (unsigned i = 0; i < 4; i++)
3376 {
3377 RTHCPTR HCPtr;
3378 RTHCPHYS HCPhys;
3379 RTGCPHYS GCPhys = pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[i].u & X86_PDPE_PG_MASK;
3380 int rc2 = pgmRamGCPhys2HCPtrAndHCPhysWithFlags(&pVM->pgm.s, GCPhys, &HCPtr, &HCPhys);
3381 if (VBOX_SUCCESS(rc2))
3382 {
3383 AssertMsg( pVM->pgm.s.apGstPaePDsHC[i] == (R3R0PTRTYPE(PX86PDPAE))HCPtr
3384 && pVM->pgm.s.aGCPhysGstPaePDs[i] == GCPhys,
3385 ("idx %d apGstPaePDsHC %VHv vs %VHv aGCPhysGstPaePDs %VGp vs %VGp\n",
3386 i, pVM->pgm.s.apGstPaePDsHC[i], HCPtr, pVM->pgm.s.aGCPhysGstPaePDs[i], GCPhys));
3387 }
3388 }
3389# endif
3390
3391 /*
3392 * Iterate the shadow page directory.
3393 */
3394 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3395 unsigned iPDDst = GCPtr >> SHW_PD_SHIFT;
3396 cPDEs += iPDDst;
3397 for (;
3398 iPDDst < cPDEs;
3399 iPDDst++, GCPtr += _4G / cPDEs)
3400 {
3401# if PGM_GST_TYPE == PGM_TYPE_PAE
3402 uint32_t iPDSrc;
3403 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pPGM, (RTGCUINTPTR)GCPtr, &iPDSrc);
3404 if (!pPDSrc)
3405 {
3406 AssertMsg(!pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[(GCPtr >> GST_PDPT_SHIFT) & GST_PDPT_MASK].n.u1Present, ("Guest PDTPR not present, shadow PDPTR %VX64\n", pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[(GCPtr >> GST_PDPT_SHIFT) & GST_PDPT_MASK].u));
3407 continue;
3408 }
3409#endif
3410
3411 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3412 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3413 {
3414 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3415 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3416 {
3417 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3418 cErrors++;
3419 continue;
3420 }
3421 }
3422 else if ( (PdeDst.u & X86_PDE_P)
3423 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3424 )
3425 {
3426 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3427 PPGMPOOLPAGE pPoolPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
3428 if (!pPoolPage)
3429 {
3430 AssertMsgFailed(("Invalid page table address %VGp at %VGv! PdeDst=%#RX64\n",
3431 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3432 cErrors++;
3433 continue;
3434 }
3435 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3436
3437 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3438 {
3439 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %VGv! These flags are not virtualized! PdeDst=%#RX64\n",
3440 GCPtr, (uint64_t)PdeDst.u));
3441 cErrors++;
3442 }
3443
3444 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3445 {
3446 AssertMsgFailed(("4K PDE reserved flags at %VGv! PdeDst=%#RX64\n",
3447 GCPtr, (uint64_t)PdeDst.u));
3448 cErrors++;
3449 }
3450
3451 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3452 if (!PdeSrc.n.u1Present)
3453 {
3454 AssertMsgFailed(("Guest PDE at %VGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3455 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3456 cErrors++;
3457 continue;
3458 }
3459
3460 if ( !PdeSrc.b.u1Size
3461 || !(cr4 & X86_CR4_PSE))
3462 {
3463 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3464# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3465 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3466# endif
3467 }
3468 else
3469 {
3470# if PGM_GST_TYPE == PGM_TYPE_32BIT
3471 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3472 {
3473 AssertMsgFailed(("Guest PDE at %VGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3474 GCPtr, (uint64_t)PdeSrc.u));
3475 cErrors++;
3476 continue;
3477 }
3478# endif
3479 GCPhysGst = PdeSrc.u & GST_PDE_BIG_PG_MASK;
3480# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3481 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3482# endif
3483 }
3484
3485 if ( pPoolPage->enmKind
3486 != (!PdeSrc.b.u1Size || !(cr4 & X86_CR4_PSE) ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3487 {
3488 AssertMsgFailed(("Invalid shadow page table kind %d at %VGv! PdeSrc=%#RX64\n",
3489 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3490 cErrors++;
3491 }
3492
3493 PPGMPAGE pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3494 if (!pPhysPage)
3495 {
3496 AssertMsgFailed(("Cannot find guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3497 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3498 cErrors++;
3499 continue;
3500 }
3501
3502 if (GCPhysGst != pPoolPage->GCPhys)
3503 {
3504 AssertMsgFailed(("GCPhysGst=%VGp != pPage->GCPhys=%VGp at %VGv\n",
3505 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3506 cErrors++;
3507 continue;
3508 }
3509
3510 if ( !PdeSrc.b.u1Size
3511 || !(cr4 & X86_CR4_PSE))
3512 {
3513 /*
3514 * Page Table.
3515 */
3516 const GSTPT *pPTSrc;
3517 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3518 if (VBOX_FAILURE(rc))
3519 {
3520 AssertMsgFailed(("Cannot map/convert guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3521 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3522 cErrors++;
3523 continue;
3524 }
3525 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3526 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3527 {
3528 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3529 // (This problem will go away when/if we shadow multiple CR3s.)
3530 AssertMsgFailed(("4K PDE flags mismatch at %VGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3531 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3532 cErrors++;
3533 continue;
3534 }
3535 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3536 {
3537 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%VGv PdeDst=%#RX64\n",
3538 GCPtr, (uint64_t)PdeDst.u));
3539 cErrors++;
3540 continue;
3541 }
3542
3543 /* iterate the page table. */
3544# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3545 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3546 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3547# else
3548 const unsigned offPTSrc = 0;
3549# endif
3550 for (unsigned iPT = 0, off = 0;
3551 iPT < ELEMENTS(pPTDst->a);
3552 iPT++, off += PAGE_SIZE)
3553 {
3554 const SHWPTE PteDst = pPTDst->a[iPT];
3555
3556 /* skip not-present entries. */
3557 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3558 continue;
3559 Assert(PteDst.n.u1Present);
3560
3561 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3562 if (!PteSrc.n.u1Present)
3563 {
3564#ifdef IN_RING3
3565 PGMAssertHandlerAndFlagsInSync(pVM);
3566 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3567#endif
3568 AssertMsgFailed(("Out of sync (!P) PTE at %VGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%VGv iPTSrc=%x PdeSrc=%x physpte=%VGp\n",
3569 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3570 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3571 cErrors++;
3572 continue;
3573 }
3574
3575 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3576# if 1 /** @todo sync accessed bit properly... */
3577 fIgnoreFlags |= X86_PTE_A;
3578# endif
3579
3580 /* match the physical addresses */
3581 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3582 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3583
3584# ifdef IN_RING3
3585 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3586 if (VBOX_FAILURE(rc))
3587 {
3588 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3589 {
3590 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3591 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3592 cErrors++;
3593 continue;
3594 }
3595 }
3596 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3597 {
3598 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3599 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3600 cErrors++;
3601 continue;
3602 }
3603# endif
3604
3605 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3606 if (!pPhysPage)
3607 {
3608# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3609 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3610 {
3611 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3612 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3613 cErrors++;
3614 continue;
3615 }
3616# endif
3617 if (PteDst.n.u1Write)
3618 {
3619 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3620 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3621 cErrors++;
3622 }
3623 fIgnoreFlags |= X86_PTE_RW;
3624 }
3625 else if (HCPhysShw != (PGM_PAGE_GET_HCPHYS(pPhysPage) & SHW_PTE_PG_MASK))
3626 {
3627 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3628 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3629 cErrors++;
3630 continue;
3631 }
3632
3633 /* flags */
3634 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3635 {
3636 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3637 {
3638 if (PteDst.n.u1Write)
3639 {
3640 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PteSrc=%#RX64 PteDst=%#RX64\n",
3641 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3642 cErrors++;
3643 continue;
3644 }
3645 fIgnoreFlags |= X86_PTE_RW;
3646 }
3647 else
3648 {
3649 if (PteDst.n.u1Present)
3650 {
3651 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VHp PteSrc=%#RX64 PteDst=%#RX64\n",
3652 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3653 cErrors++;
3654 continue;
3655 }
3656 fIgnoreFlags |= X86_PTE_P;
3657 }
3658 }
3659 else
3660 {
3661 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3662 {
3663 if (PteDst.n.u1Write)
3664 {
3665 AssertMsgFailed(("!DIRTY page at %VGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3666 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3667 cErrors++;
3668 continue;
3669 }
3670 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3671 {
3672 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3673 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3674 cErrors++;
3675 continue;
3676 }
3677 if (PteDst.n.u1Dirty)
3678 {
3679 AssertMsgFailed(("!DIRTY page at %VGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3680 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3681 cErrors++;
3682 }
3683# if 0 /** @todo sync access bit properly... */
3684 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3685 {
3686 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3687 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3688 cErrors++;
3689 }
3690 fIgnoreFlags |= X86_PTE_RW;
3691# else
3692 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3693# endif
3694 }
3695 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3696 {
3697 /* access bit emulation (not implemented). */
3698 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3699 {
3700 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3701 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3702 cErrors++;
3703 continue;
3704 }
3705 if (!PteDst.n.u1Accessed)
3706 {
3707 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3708 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3709 cErrors++;
3710 }
3711 fIgnoreFlags |= X86_PTE_P;
3712 }
3713# ifdef DEBUG_sandervl
3714 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3715# endif
3716 }
3717
3718 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3719 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
3720 )
3721 {
3722 AssertMsgFailed(("Flags mismatch at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3723 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3724 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3725 cErrors++;
3726 continue;
3727 }
3728 } /* foreach PTE */
3729 }
3730 else
3731 {
3732 /*
3733 * Big Page.
3734 */
3735 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
3736 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
3737 {
3738 if (PdeDst.n.u1Write)
3739 {
3740 AssertMsgFailed(("!DIRTY page at %VGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3741 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3742 cErrors++;
3743 continue;
3744 }
3745 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
3746 {
3747 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3748 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3749 cErrors++;
3750 continue;
3751 }
3752# if 0 /** @todo sync access bit properly... */
3753 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
3754 {
3755 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3756 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3757 cErrors++;
3758 }
3759 fIgnoreFlags |= X86_PTE_RW;
3760# else
3761 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3762# endif
3763 }
3764 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3765 {
3766 /* access bit emulation (not implemented). */
3767 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
3768 {
3769 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3770 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3771 cErrors++;
3772 continue;
3773 }
3774 if (!PdeDst.n.u1Accessed)
3775 {
3776 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3777 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3778 cErrors++;
3779 }
3780 fIgnoreFlags |= X86_PTE_P;
3781 }
3782
3783 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
3784 {
3785 AssertMsgFailed(("Flags mismatch (B) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
3786 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
3787 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3788 cErrors++;
3789 }
3790
3791 /* iterate the page table. */
3792 for (unsigned iPT = 0, off = 0;
3793 iPT < ELEMENTS(pPTDst->a);
3794 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
3795 {
3796 const SHWPTE PteDst = pPTDst->a[iPT];
3797
3798 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3799 {
3800 AssertMsgFailed(("The PTE at %VGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
3801 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3802 cErrors++;
3803 }
3804
3805 /* skip not-present entries. */
3806 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
3807 continue;
3808
3809 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3810
3811 /* match the physical addresses */
3812 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
3813
3814# ifdef IN_RING3
3815 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3816 if (VBOX_FAILURE(rc))
3817 {
3818 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3819 {
3820 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3821 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3822 cErrors++;
3823 }
3824 }
3825 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
3826 {
3827 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3828 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3829 cErrors++;
3830 continue;
3831 }
3832# endif
3833
3834 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3835 if (!pPhysPage)
3836 {
3837# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3838 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3839 {
3840 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3841 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3842 cErrors++;
3843 continue;
3844 }
3845# endif
3846 if (PteDst.n.u1Write)
3847 {
3848 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3849 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3850 cErrors++;
3851 }
3852 fIgnoreFlags |= X86_PTE_RW;
3853 }
3854 else if (HCPhysShw != (pPhysPage->HCPhys & X86_PTE_PAE_PG_MASK))
3855 {
3856 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3857 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3858 cErrors++;
3859 continue;
3860 }
3861
3862 /* flags */
3863 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3864 {
3865 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3866 {
3867 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
3868 {
3869 if (PteDst.n.u1Write)
3870 {
3871 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
3872 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3873 cErrors++;
3874 continue;
3875 }
3876 fIgnoreFlags |= X86_PTE_RW;
3877 }
3878 }
3879 else
3880 {
3881 if (PteDst.n.u1Present)
3882 {
3883 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
3884 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3885 cErrors++;
3886 continue;
3887 }
3888 fIgnoreFlags |= X86_PTE_P;
3889 }
3890 }
3891
3892 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3893 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
3894 )
3895 {
3896 AssertMsgFailed(("Flags mismatch (BT) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
3897 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3898 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3899 cErrors++;
3900 continue;
3901 }
3902 } /* foreach PTE */
3903 }
3904 }
3905 /* not present */
3906
3907 } /* forearch PDE */
3908
3909# ifdef DEBUG
3910 if (cErrors)
3911 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
3912# endif
3913
3914#elif PGM_GST_TYPE == PGM_TYPE_PAE
3915//# error not implemented
3916
3917
3918#elif PGM_GST_TYPE == PGM_TYPE_AMD64
3919//# error not implemented
3920
3921/*#else: guest real and protected mode */
3922#endif
3923 return cErrors;
3924
3925#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED */
3926}
3927#endif /* VBOX_STRICT */
3928
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette