VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 10324

Last change on this file since 10324 was 10323, checked in by vboxsync, 17 years ago

Clear the shadow page table entries whenever the guest modifies its page tables. This prevents us
from checking all levels in SyncCR3. (amd64 shadow case only)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 187.2 KB
Line 
1/* $Id: PGMAllBth.h 10323 2008-07-07 13:54:05Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27__BEGIN_DECLS
28PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCUINTPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PGSTPD pPDSrc, RTGCUINTPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCUINTPTR GCPtr = 0, RTGCUINTPTR cb = ~(RTGCUINTPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42__END_DECLS
43
44
45/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
46#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED
47# error "Invalid combination; PAE guest implies PAE shadow"
48#endif
49
50#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
51 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED)
52# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
53#endif
54
55#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
56 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED)
57# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
58#endif
59
60#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED) \
61 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
62# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
63#endif
64
65#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
66# define PGM_WITHOUT_MAPPINGS
67#endif
68
69/**
70 * #PF Handler for raw-mode guest execution.
71 *
72 * @returns VBox status code (appropriate for trap handling and GC return).
73 * @param pVM VM Handle.
74 * @param uErr The trap error code.
75 * @param pRegFrame Trap register frame.
76 * @param pvFault The fault address.
77 */
78PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
79{
80#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
81 && PGM_SHW_TYPE != PGM_TYPE_NESTED
82
83# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
84 /*
85 * Hide the instruction fetch trap indicator for now.
86 */
87 /** @todo NXE will change this and we must fix NXE in the switcher too! */
88 if (uErr & X86_TRAP_PF_ID)
89 {
90 uErr &= ~X86_TRAP_PF_ID;
91 TRPMSetErrorCode(pVM, uErr);
92 }
93# endif
94
95 /*
96 * Get PDs.
97 */
98 int rc;
99# if PGM_WITH_PAGING(PGM_GST_TYPE)
100# if PGM_GST_TYPE == PGM_TYPE_32BIT
101 const unsigned iPDSrc = (RTGCUINTPTR)pvFault >> GST_PD_SHIFT;
102 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
103
104# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
105
106# if PGM_GST_TYPE == PGM_TYPE_PAE
107 unsigned iPDSrc;
108 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, (RTGCUINTPTR)pvFault, &iPDSrc);
109
110# elif PGM_GST_TYPE == PGM_TYPE_AMD64
111 unsigned iPDSrc;
112 PX86PML4E pPml4eSrc;
113 X86PDPE PdpeSrc;
114 PGSTPD pPDSrc;
115
116 pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
117 Assert(pPml4eSrc);
118# endif
119 /* Quick check for a valid guest trap. */
120 if (!pPDSrc)
121 {
122 LogFlow(("Trap0eHandler: guest PDPTR not present CR3=%VGp\n", (CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK)));
123 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eGuestTrap; });
124 TRPMSetErrorCode(pVM, uErr);
125 return VINF_EM_RAW_GUEST_TRAP;
126 }
127# endif
128# else
129 PGSTPD pPDSrc = NULL;
130 const unsigned iPDSrc = 0;
131# endif
132
133# if PGM_SHW_TYPE == PGM_TYPE_32BIT
134 const unsigned iPDDst = (RTGCUINTPTR)pvFault >> SHW_PD_SHIFT;
135 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
136# elif PGM_SHW_TYPE == PGM_TYPE_PAE
137 const unsigned iPDDst = (RTGCUINTPTR)pvFault >> SHW_PD_SHIFT;
138 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]; /* We treat this as a PD with 2048 entries, so no need to and with SHW_PD_MASK to get iPDDst */
139
140# if PGM_GST_TYPE == PGM_TYPE_PAE
141 /* Did we mark the PDPT as not present in SyncCR3? */
142 unsigned iPdpte = ((RTGCUINTPTR)pvFault >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
143 if (!pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPdpte].n.u1Present)
144 pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPdpte].n.u1Present = 1;
145
146# endif
147
148# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
149 const unsigned iPDDst = (((RTGCUINTPTR)pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
150 PX86PDPAE pPDDst;
151# if PGM_GST_TYPE == PGM_TYPE_PROT
152 /* AMD-V nested paging */
153 X86PML4E Pml4eSrc;
154 X86PDPE PdpeSrc;
155 PX86PML4E pPml4eSrc = &Pml4eSrc;
156
157 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
158 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
159 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
160# endif
161
162 rc = PGMShwSyncLongModePDPtr(pVM, (RTGCUINTPTR)pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
163 if (rc != VINF_SUCCESS)
164 {
165 AssertRC(rc);
166 return rc;
167 }
168 Assert(pPDDst);
169# endif
170
171# if PGM_WITH_PAGING(PGM_GST_TYPE)
172 /*
173 * If we successfully correct the write protection fault due to dirty bit
174 * tracking, or this page fault is a genuine one, then return immediately.
175 */
176 STAM_PROFILE_START(&pVM->pgm.s.StatCheckPageFault, e);
177 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], (RTGCUINTPTR)pvFault);
178 STAM_PROFILE_STOP(&pVM->pgm.s.StatCheckPageFault, e);
179 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
180 || rc == VINF_EM_RAW_GUEST_TRAP)
181 {
182 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution)
183 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatTrap0eDirtyAndAccessedBits : &pVM->pgm.s.StatTrap0eGuestTrap; });
184 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
185 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
186 }
187
188 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0ePD[iPDSrc]);
189# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
190
191 /*
192 * A common case is the not-present error caused by lazy page table syncing.
193 *
194 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
195 * so we can safely assume that the shadow PT is present when calling SyncPage later.
196 *
197 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
198 * of mapping conflict and defer to SyncCR3 in R3.
199 * (Again, we do NOT support access handlers for non-present guest pages.)
200 *
201 */
202# if PGM_WITH_PAGING(PGM_GST_TYPE)
203 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
204# else
205 GSTPDE PdeSrc;
206 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
207 PdeSrc.n.u1Present = 1;
208 PdeSrc.n.u1Write = 1;
209 PdeSrc.n.u1Accessed = 1;
210 PdeSrc.n.u1User = 1;
211# endif
212 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
213 && !pPDDst->a[iPDDst].n.u1Present
214 && PdeSrc.n.u1Present
215 )
216
217 {
218 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eSyncPT; });
219 STAM_PROFILE_START(&pVM->pgm.s.StatLazySyncPT, f);
220 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
221 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, (RTGCUINTPTR)pvFault);
222 if (VBOX_SUCCESS(rc))
223 {
224 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
225 return rc;
226 }
227 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
228 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
229 STAM_PROFILE_STOP(&pVM->pgm.s.StatLazySyncPT, f);
230 return VINF_PGM_SYNC_CR3;
231 }
232
233# if PGM_WITH_PAGING(PGM_GST_TYPE)
234 /*
235 * Check if this address is within any of our mappings.
236 *
237 * This is *very* fast and it's gonna save us a bit of effort below and prevent
238 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
239 * (BTW, it's impossible to have physical access handlers in a mapping.)
240 */
241 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
242 {
243 STAM_PROFILE_START(&pVM->pgm.s.StatMapping, a);
244 PPGMMAPPING pMapping = CTXALLSUFF(pVM->pgm.s.pMappings);
245 for ( ; pMapping; pMapping = CTXALLSUFF(pMapping->pNext))
246 {
247 if ((RTGCUINTPTR)pvFault < (RTGCUINTPTR)pMapping->GCPtr)
248 break;
249 if ((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pMapping->GCPtr < pMapping->cb)
250 {
251 /*
252 * The first thing we check is if we've got an undetected conflict.
253 */
254 if (!pVM->pgm.s.fMappingsFixed)
255 {
256 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
257 while (iPT-- > 0)
258 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
259 {
260 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eConflicts);
261 Log(("Trap0e: Detected Conflict %VGv-%VGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
262 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
263 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
264 return VINF_PGM_SYNC_CR3;
265 }
266 }
267
268 /*
269 * Check if the fault address is in a virtual page access handler range.
270 */
271 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->HyperVirtHandlers, pvFault);
272 if ( pCur
273 && (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
274 && uErr & X86_TRAP_PF_RW)
275 {
276# ifdef IN_GC
277 STAM_PROFILE_START(&pCur->Stat, h);
278 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
279 STAM_PROFILE_STOP(&pCur->Stat, h);
280# else
281 AssertFailed();
282 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
283# endif
284 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eMapHandler);
285 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
286 return rc;
287 }
288
289 /*
290 * Pretend we're not here and let the guest handle the trap.
291 */
292 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
293 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eMap);
294 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
295 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
296 return VINF_EM_RAW_GUEST_TRAP;
297 }
298 }
299 STAM_PROFILE_STOP(&pVM->pgm.s.StatMapping, a);
300 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
301# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
302
303 /*
304 * Check if this fault address is flagged for special treatment,
305 * which means we'll have to figure out the physical address and
306 * check flags associated with it.
307 *
308 * ASSUME that we can limit any special access handling to pages
309 * in page tables which the guest believes to be present.
310 */
311 if (PdeSrc.n.u1Present)
312 {
313 RTGCPHYS GCPhys = NIL_RTGCPHYS;
314
315# if PGM_WITH_PAGING(PGM_GST_TYPE)
316# if PGM_GST_TYPE == PGM_TYPE_AMD64
317 bool fBigPagesSupported = true;
318# else
319 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
320# endif
321 if ( PdeSrc.b.u1Size
322 && fBigPagesSupported)
323 GCPhys = (PdeSrc.u & GST_PDE_BIG_PG_MASK)
324 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
325 else
326 {
327 PGSTPT pPTSrc;
328 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
329 if (VBOX_SUCCESS(rc))
330 {
331 unsigned iPTESrc = ((RTGCUINTPTR)pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
332 if (pPTSrc->a[iPTESrc].n.u1Present)
333 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
334 }
335 }
336# else
337 /* No paging so the fault address is the physical address */
338 GCPhys = (RTGCPHYS)((RTGCUINTPTR)pvFault & ~PAGE_OFFSET_MASK);
339# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
340
341 /*
342 * If we have a GC address we'll check if it has any flags set.
343 */
344 if (GCPhys != NIL_RTGCPHYS)
345 {
346 STAM_PROFILE_START(&pVM->pgm.s.StatHandlers, b);
347
348 PPGMPAGE pPage;
349 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
350 if (VBOX_SUCCESS(rc))
351 {
352 if (PGM_PAGE_HAS_ANY_HANDLERS(pPage))
353 {
354 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
355 {
356 /*
357 * Physical page access handler.
358 */
359 const RTGCPHYS GCPhysFault = GCPhys | ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK);
360 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->PhysHandlers, GCPhysFault);
361 if (pCur)
362 {
363# ifdef PGM_SYNC_N_PAGES
364 /*
365 * If the region is write protected and we got a page not present fault, then sync
366 * the pages. If the fault was caused by a read, then restart the instruction.
367 * In case of write access continue to the GC write handler.
368 *
369 * ASSUMES that there is only one handler per page or that they have similar write properties.
370 */
371 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
372 && !(uErr & X86_TRAP_PF_P))
373 {
374 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
375 if ( VBOX_FAILURE(rc)
376 || !(uErr & X86_TRAP_PF_RW)
377 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
378 {
379 AssertRC(rc);
380 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
381 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
382 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
383 return rc;
384 }
385 }
386# endif
387
388 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
389 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
390 ("Unexpected trap for physical handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
391
392#if defined(IN_GC) || defined(IN_RING0)
393 if (CTXALLSUFF(pCur->pfnHandler))
394 {
395 STAM_PROFILE_START(&pCur->Stat, h);
396 rc = pCur->CTXALLSUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, CTXALLSUFF(pCur->pvUser));
397 STAM_PROFILE_STOP(&pCur->Stat, h);
398 }
399 else
400#endif
401 rc = VINF_EM_RAW_EMULATE_INSTR;
402 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersPhysical);
403 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
404 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndPhys; });
405 return rc;
406 }
407 }
408# if PGM_WITH_PAGING(PGM_GST_TYPE)
409 else
410 {
411# ifdef PGM_SYNC_N_PAGES
412 /*
413 * If the region is write protected and we got a page not present fault, then sync
414 * the pages. If the fault was caused by a read, then restart the instruction.
415 * In case of write access continue to the GC write handler.
416 */
417 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
418 && !(uErr & X86_TRAP_PF_P))
419 {
420 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
421 if ( VBOX_FAILURE(rc)
422 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
423 || !(uErr & X86_TRAP_PF_RW))
424 {
425 AssertRC(rc);
426 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
427 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
428 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndVirt; });
429 return rc;
430 }
431 }
432# endif
433 /*
434 * Ok, it's an virtual page access handler.
435 *
436 * Since it's faster to search by address, we'll do that first
437 * and then retry by GCPhys if that fails.
438 */
439 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
440 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
441 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
442 */
443 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
444 if (pCur)
445 {
446 AssertMsg(!((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
447 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
448 || !(uErr & X86_TRAP_PF_P)
449 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
450 ("Unexpected trap for virtual handler: %VGv (phys=%VGp) HCPhys=%HGp uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
451
452 if ( (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
453 && ( uErr & X86_TRAP_PF_RW
454 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
455 {
456# ifdef IN_GC
457 STAM_PROFILE_START(&pCur->Stat, h);
458 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
459 STAM_PROFILE_STOP(&pCur->Stat, h);
460# else
461 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
462# endif
463 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtual);
464 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
465 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
466 return rc;
467 }
468 /* Unhandled part of a monitored page */
469 }
470 else
471 {
472 /* Check by physical address. */
473 PPGMVIRTHANDLER pCur;
474 unsigned iPage;
475 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK),
476 &pCur, &iPage);
477 Assert(VBOX_SUCCESS(rc) || !pCur);
478 if ( pCur
479 && ( uErr & X86_TRAP_PF_RW
480 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
481 {
482 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
483# ifdef IN_GC
484 RTGCUINTPTR off = (iPage << PAGE_SHIFT) + ((RTGCUINTPTR)pvFault & PAGE_OFFSET_MASK) - ((RTGCUINTPTR)pCur->GCPtr & PAGE_OFFSET_MASK);
485 Assert(off < pCur->cb);
486 STAM_PROFILE_START(&pCur->Stat, h);
487 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, off);
488 STAM_PROFILE_STOP(&pCur->Stat, h);
489# else
490 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
491# endif
492 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualByPhys);
493 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
494 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
495 return rc;
496 }
497 }
498 }
499# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
500
501 /*
502 * There is a handled area of the page, but this fault doesn't belong to it.
503 * We must emulate the instruction.
504 *
505 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
506 * we first check if this was a page-not-present fault for a page with only
507 * write access handlers. Restart the instruction if it wasn't a write access.
508 */
509 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersUnhandled);
510
511 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
512 && !(uErr & X86_TRAP_PF_P))
513 {
514 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
515 if ( VBOX_FAILURE(rc)
516 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
517 || !(uErr & X86_TRAP_PF_RW))
518 {
519 AssertRC(rc);
520 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersOutOfSync);
521 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
522 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncHndPhys; });
523 return rc;
524 }
525 }
526
527 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
528 * It's writing to an unhandled part of the LDT page several million times.
529 */
530 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
531 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d HCPhys=%RHp%s%s\n",
532 rc, pPage->HCPhys,
533 PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage) ? " phys" : "",
534 PGM_PAGE_HAS_ANY_VIRTUAL_HANDLERS(pPage) ? " virt" : ""));
535 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
536 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndUnhandled; });
537 return rc;
538 } /* if any kind of handler */
539
540# if PGM_WITH_PAGING(PGM_GST_TYPE)
541 if (uErr & X86_TRAP_PF_P)
542 {
543 /*
544 * The page isn't marked, but it might still be monitored by a virtual page access handler.
545 * (ASSUMES no temporary disabling of virtual handlers.)
546 */
547 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
548 * we should correct both the shadow page table and physical memory flags, and not only check for
549 * accesses within the handler region but for access to pages with virtual handlers. */
550 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&CTXSUFF(pVM->pgm.s.pTrees)->VirtHandlers, pvFault);
551 if (pCur)
552 {
553 AssertMsg( !((RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb)
554 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
555 || !(uErr & X86_TRAP_PF_P)
556 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
557 ("Unexpected trap for virtual handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
558
559 if ( (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr < pCur->cb
560 && ( uErr & X86_TRAP_PF_RW
561 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
562 {
563# ifdef IN_GC
564 STAM_PROFILE_START(&pCur->Stat, h);
565 rc = CTXSUFF(pCur->pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->GCPtr, (RTGCUINTPTR)pvFault - (RTGCUINTPTR)pCur->GCPtr);
566 STAM_PROFILE_STOP(&pCur->Stat, h);
567# else
568 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
569# endif
570 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersVirtualUnmarked);
571 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
572 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eHndVirt; });
573 return rc;
574 }
575 }
576 }
577# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
578 }
579 else
580 {
581 /* When the guest accesses invalid physical memory (e.g. probing of RAM or accessing a remapped MMIO range), then we'll fall
582 * back to the recompiler to emulate the instruction.
583 */
584 LogFlow(("pgmPhysGetPageEx %VGp failed with %Vrc\n", GCPhys, rc));
585 STAM_COUNTER_INC(&pVM->pgm.s.StatHandlersInvalid);
586 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
587 return VINF_EM_RAW_EMULATE_INSTR;
588 }
589
590 STAM_PROFILE_STOP(&pVM->pgm.s.StatHandlers, b);
591
592# ifdef PGM_OUT_OF_SYNC_IN_GC
593 /*
594 * We are here only if page is present in Guest page tables and trap is not handled
595 * by our handlers.
596 * Check it for page out-of-sync situation.
597 */
598 STAM_PROFILE_START(&pVM->pgm.s.StatOutOfSync, c);
599
600 if (!(uErr & X86_TRAP_PF_P))
601 {
602 /*
603 * Page is not present in our page tables.
604 * Try to sync it!
605 * BTW, fPageShw is invalid in this branch!
606 */
607 if (uErr & X86_TRAP_PF_US)
608 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
609 else /* supervisor */
610 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
611
612# if defined(LOG_ENABLED) && !defined(IN_RING0)
613 RTGCPHYS GCPhys;
614 uint64_t fPageGst;
615 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
616 Log(("Page out of sync: %VGv eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%VGp scan=%d\n",
617 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)));
618# endif /* LOG_ENABLED */
619
620# if PGM_WITH_PAGING(PGM_GST_TYPE) && !defined(IN_RING0)
621 if (CPUMGetGuestCPL(pVM, pRegFrame) == 0)
622 {
623 uint64_t fPageGst;
624 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
625 if ( VBOX_SUCCESS(rc)
626 && !(fPageGst & X86_PTE_US))
627 {
628 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
629 if ( pvFault == (RTGCPTR)pRegFrame->eip
630 || (RTGCUINTPTR)pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
631# ifdef CSAM_DETECT_NEW_CODE_PAGES
632 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
633 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)) /* any new code we encounter here */
634# endif /* CSAM_DETECT_NEW_CODE_PAGES */
635 )
636 {
637 LogFlow(("CSAMExecFault %VGv\n", pRegFrame->eip));
638 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
639 if (rc != VINF_SUCCESS)
640 {
641 /*
642 * CSAM needs to perform a job in ring 3.
643 *
644 * Sync the page before going to the host context; otherwise we'll end up in a loop if
645 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
646 */
647 LogFlow(("CSAM ring 3 job\n"));
648 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
649 AssertRC(rc2);
650
651 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
652 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eCSAM; });
653 return rc;
654 }
655 }
656# ifdef CSAM_DETECT_NEW_CODE_PAGES
657 else
658 if ( uErr == X86_TRAP_PF_RW
659 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
660 && pRegFrame->ecx < 0x10000
661 )
662 {
663 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
664 * to detect loading of new code pages.
665 */
666
667 /*
668 * Decode the instruction.
669 */
670 RTGCPTR PC;
671 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
672 if (rc == VINF_SUCCESS)
673 {
674 DISCPUSTATE Cpu;
675 uint32_t cbOp;
676 rc = EMInterpretDisasOneEx(pVM, (RTGCUINTPTR)PC, pRegFrame, &Cpu, &cbOp);
677
678 /* For now we'll restrict this to rep movsw/d instructions */
679 if ( rc == VINF_SUCCESS
680 && Cpu.pCurInstr->opcode == OP_MOVSWD
681 && (Cpu.prefix & PREFIX_REP))
682 {
683 CSAMMarkPossibleCodePage(pVM, pvFault);
684 }
685 }
686 }
687# endif /* CSAM_DETECT_NEW_CODE_PAGES */
688
689 /*
690 * Mark this page as safe.
691 */
692 /** @todo not correct for pages that contain both code and data!! */
693 Log2(("CSAMMarkPage %VGv; scanned=%d\n", pvFault, true));
694 CSAMMarkPage(pVM, (RTRCPTR)pvFault, true);
695 }
696 }
697# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) && !defined(IN_RING0) */
698 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, PGM_SYNC_NR_PAGES, uErr);
699 if (VBOX_SUCCESS(rc))
700 {
701 /* The page was successfully synced, return to the guest. */
702 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
703 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSync; });
704 return VINF_SUCCESS;
705 }
706 }
707 else
708 {
709 /*
710 * A side effect of not flushing global PDEs are out of sync pages due
711 * to physical monitored regions, that are no longer valid.
712 * Assume for now it only applies to the read/write flag
713 */
714 if (VBOX_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
715 {
716 if (uErr & X86_TRAP_PF_US)
717 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
718 else /* supervisor */
719 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
720
721
722 /*
723 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the page is not present, which is not true in this case.
724 */
725 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)pvFault, 1, uErr);
726 if (VBOX_SUCCESS(rc))
727 {
728 /*
729 * Page was successfully synced, return to guest.
730 */
731# ifdef VBOX_STRICT
732 RTGCPHYS GCPhys;
733 uint64_t fPageGst;
734 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
735 Assert(VBOX_SUCCESS(rc) && fPageGst & X86_PTE_RW);
736 LogFlow(("Obsolete physical monitor page out of sync %VGv - phys %VGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
737
738 uint64_t fPageShw;
739 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
740 AssertMsg(VBOX_SUCCESS(rc) && fPageShw & X86_PTE_RW, ("rc=%Vrc fPageShw=%VX64\n", rc, fPageShw));
741# endif /* VBOX_STRICT */
742 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
743 STAM_STATS({ pVM->pgm.s.CTXSUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatTrap0eOutOfSyncObsHnd; });
744 return VINF_SUCCESS;
745 }
746
747 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
748 if ( CPUMGetGuestCPL(pVM, pRegFrame) == 0
749 && ((CPUMGetGuestCR0(pVM) & (X86_CR0_WP|X86_CR0_PG)) == X86_CR0_PG)
750 && (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P))
751 {
752 uint64_t fPageGst;
753 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
754 if ( VBOX_SUCCESS(rc)
755 && !(fPageGst & X86_PTE_RW))
756 {
757 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
758 if (VBOX_SUCCESS(rc))
759 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eWPEmulGC);
760 else
761 STAM_COUNTER_INC(&pVM->pgm.s.StatTrap0eWPEmulR3);
762 return rc;
763 }
764 else
765 AssertMsgFailed(("Unexpected r/w page %x flag=%x\n", pvFault, (uint32_t)fPageGst));
766 }
767
768 }
769
770# if PGM_WITH_PAGING(PGM_GST_TYPE)
771# ifdef VBOX_STRICT
772 /*
773 * Check for VMM page flags vs. Guest page flags consistency.
774 * Currently only for debug purposes.
775 */
776 if (VBOX_SUCCESS(rc))
777 {
778 /* Get guest page flags. */
779 uint64_t fPageGst;
780 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
781 if (VBOX_SUCCESS(rc))
782 {
783 uint64_t fPageShw;
784 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
785
786 /*
787 * Compare page flags.
788 * Note: we have AVL, A, D bits desynched.
789 */
790 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
791 ("Page flags mismatch! pvFault=%VGv GCPhys=%VGp fPageShw=%08llx fPageGst=%08llx\n", pvFault, GCPhys, fPageShw, fPageGst));
792 }
793 else
794 AssertMsgFailed(("PGMGstGetPage rc=%Vrc\n", rc));
795 }
796 else
797 AssertMsgFailed(("PGMGCGetPage rc=%Vrc\n", rc));
798# endif /* VBOX_STRICT */
799# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
800 }
801 STAM_PROFILE_STOP(&pVM->pgm.s.StatOutOfSync, c);
802# endif /* PGM_OUT_OF_SYNC_IN_GC */
803 }
804 else
805 {
806 /*
807 * Page not present in Guest OS or invalid page table address.
808 * This is potential virtual page access handler food.
809 *
810 * For the present we'll say that our access handlers don't
811 * work for this case - we've already discarded the page table
812 * not present case which is identical to this.
813 *
814 * When we perchance find we need this, we will probably have AVL
815 * trees (offset based) to operate on and we can measure their speed
816 * agains mapping a page table and probably rearrange this handling
817 * a bit. (Like, searching virtual ranges before checking the
818 * physical address.)
819 */
820 }
821 }
822
823
824# if PGM_WITH_PAGING(PGM_GST_TYPE)
825 /*
826 * Conclusion, this is a guest trap.
827 */
828 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
829 STAM_COUNTER_INC(&pVM->pgm.s.StatGCTrap0eUnhandled);
830 return VINF_EM_RAW_GUEST_TRAP;
831# else
832 /* present, but not a monitored page; perhaps the guest is probing physical memory */
833 return VINF_EM_RAW_EMULATE_INSTR;
834# endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
835
836
837#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
838
839 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
840 return VERR_INTERNAL_ERROR;
841#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
842}
843
844
845/**
846 * Emulation of the invlpg instruction.
847 *
848 *
849 * @returns VBox status code.
850 *
851 * @param pVM VM handle.
852 * @param GCPtrPage Page to invalidate.
853 *
854 * @remark ASSUMES that the guest is updating before invalidating. This order
855 * isn't required by the CPU, so this is speculative and could cause
856 * trouble.
857 *
858 * @todo Flush page or page directory only if necessary!
859 * @todo Add a #define for simply invalidating the page.
860 */
861PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCUINTPTR GCPtrPage)
862{
863#if PGM_WITH_PAGING(PGM_GST_TYPE) \
864 && PGM_SHW_TYPE != PGM_TYPE_NESTED
865 int rc;
866
867 LogFlow(("InvalidatePage %VGv\n", GCPtrPage));
868 /*
869 * Get the shadow PD entry and skip out if this PD isn't present.
870 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
871 */
872# if PGM_SHW_TYPE == PGM_TYPE_32BIT
873 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
874 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
875# elif PGM_SHW_TYPE == PGM_TYPE_PAE
876 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT; /* no mask; flat index into the 2048 entry array. */
877 const unsigned iPdpte = (GCPtrPage >> X86_PDPT_SHIFT);
878 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs[0])->a[iPDDst];
879 PX86PDPT pPdptDst = pVM->pgm.s.CTXMID(p,PaePDPT);
880# else /* AMD64 */
881 /* PML4 */
882 AssertReturn(pVM->pgm.s.pHCPaePML4, VERR_INTERNAL_ERROR);
883
884 const unsigned iPml4e = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
885 const unsigned iPdpte = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
886 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
887 PX86PDPAE pPDDst;
888 PX86PDPT pPdptDst;
889 PX86PML4E pPml4eDst = &pVM->pgm.s.pHCPaePML4->a[iPml4e];
890 rc = PGMShwGetLongModePDPtr(pVM, GCPtrPage, &pPdptDst, &pPDDst);
891 if (rc != VINF_SUCCESS)
892 {
893 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Vrc\n", rc));
894 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
895 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
896 PGM_INVL_GUEST_TLBS();
897 return VINF_SUCCESS;
898 }
899 Assert(pPDDst);
900
901 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
902 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpte];
903
904 if (!pPdpeDst->n.u1Present)
905 {
906 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
907 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
908 PGM_INVL_GUEST_TLBS();
909 return VINF_SUCCESS;
910 }
911
912# endif
913
914 const SHWPDE PdeDst = *pPdeDst;
915 if (!PdeDst.n.u1Present)
916 {
917 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePageSkipped));
918 return VINF_SUCCESS;
919 }
920
921 /*
922 * Get the guest PD entry and calc big page.
923 */
924# if PGM_GST_TYPE == PGM_TYPE_32BIT
925 PX86PD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
926 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
927 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
928# else
929 unsigned iPDSrc;
930# if PGM_GST_TYPE == PGM_TYPE_PAE
931 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
932# else /* AMD64 */
933 PX86PML4E pPml4eSrc;
934 X86PDPE PdpeSrc;
935 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
936# endif
937 GSTPDE PdeSrc;
938
939 if (pPDSrc)
940 PdeSrc = pPDSrc->a[iPDSrc];
941 else
942 PdeSrc.u = 0;
943# endif
944
945# if PGM_GST_TYPE == PGM_TYPE_AMD64
946 const bool fIsBigPage = PdeSrc.b.u1Size;
947# else
948 const bool fIsBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
949# endif
950
951# ifdef IN_RING3
952 /*
953 * If a CR3 Sync is pending we may ignore the invalidate page operation
954 * depending on the kind of sync and if it's a global page or not.
955 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
956 */
957# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
958 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
959 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
960 && fIsBigPage
961 && PdeSrc.b.u1Global
962 )
963 )
964# else
965 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
966# endif
967 {
968 STAM_COUNTER_INC(&pVM->pgm.s.StatHCInvalidatePageSkipped);
969 return VINF_SUCCESS;
970 }
971# endif /* IN_RING3 */
972
973# if PGM_GST_TYPE == PGM_TYPE_AMD64
974 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
975
976 /* Fetch the pgm pool shadow descriptor. */
977 PPGMPOOLPAGE pShwPdpt = pgmPoolGetPageByHCPhys(pVM, pPml4eDst->u & X86_PML4E_PG_MASK);
978 Assert(pShwPdpt);
979
980 /* Fetch the pgm pool shadow descriptor. */
981 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpte].u & SHW_PDPE_PG_MASK);
982 Assert(pShwPde);
983
984 Assert(pPml4eDst->n.u1Present && (pPml4eDst->u & SHW_PDPT_MASK));
985 RTGCPHYS GCPhysPdpt = pPml4eSrc->u & X86_PML4E_PG_MASK;
986
987 if ( !pPml4eSrc->n.u1Present
988 || pShwPdpt->GCPhys != GCPhysPdpt)
989 {
990 LogFlow(("InvalidatePage: Out-of-sync PML4E (P/GCPhys) at %VGv GCPhys=%VGp vs %VGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
991 GCPtrPage, pShwPdpt->GCPhys, GCPhysPdpt, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
992 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.pHCShwAmd64CR3->idx, iPml4e);
993 pPml4eDst->u = 0;
994 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
995 PGM_INVL_GUEST_TLBS();
996 return VINF_SUCCESS;
997 }
998 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
999 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
1000 {
1001 /*
1002 * Mark not present so we can resync the PML4E when it's used.
1003 */
1004 LogFlow(("InvalidatePage: Out-of-sync PML4E at %VGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1005 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1006 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.pHCShwAmd64CR3->idx, iPml4e);
1007 pPml4eDst->u = 0;
1008 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1009 PGM_INVL_GUEST_TLBS();
1010 }
1011 else if (!pPml4eSrc->n.u1Accessed)
1012 {
1013 /*
1014 * Mark not present so we can set the accessed bit.
1015 */
1016 LogFlow(("InvalidatePage: Out-of-sync PML4E (A) at %VGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1017 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1018 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.pHCShwAmd64CR3->idx, iPml4e);
1019 pPml4eDst->u = 0;
1020 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
1021 PGM_INVL_GUEST_TLBS();
1022 }
1023
1024 /* Check if the PDPT entry has changed. */
1025 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
1026 RTGCPHYS GCPhysPd = PdpeSrc.u & GST_PDPE_PG_MASK;
1027 if ( !PdpeSrc.n.u1Present
1028 || pShwPde->GCPhys != GCPhysPd)
1029 {
1030 LogFlow(("InvalidatePage: Out-of-sync PDPE (P/GCPhys) at %VGv GCPhys=%VGp vs %VGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1031 GCPtrPage, pShwPde->GCPhys, GCPhysPd, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1032 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpte);
1033 pPdpeDst->u = 0;
1034 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
1035 PGM_INVL_GUEST_TLBS();
1036 return VINF_SUCCESS;
1037 }
1038 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
1039 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
1040 {
1041 /*
1042 * Mark not present so we can resync the PDPTE when it's used.
1043 */
1044 LogFlow(("InvalidatePage: Out-of-sync PDPE at %VGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1045 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1046 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpte);
1047 pPdpeDst->u = 0;
1048 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1049 PGM_INVL_GUEST_TLBS();
1050 }
1051 else if (!PdpeSrc.lm.u1Accessed)
1052 {
1053 /*
1054 * Mark not present so we can set the accessed bit.
1055 */
1056 LogFlow(("InvalidatePage: Out-of-sync PDPE (A) at %VGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1057 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1058 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpte);
1059 pPdpeDst->u = 0;
1060 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
1061 PGM_INVL_GUEST_TLBS();
1062 }
1063# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
1064
1065# if PGM_GST_TYPE == PGM_TYPE_PAE
1066
1067# endif
1068
1069
1070 /*
1071 * Deal with the Guest PDE.
1072 */
1073 rc = VINF_SUCCESS;
1074 if (PdeSrc.n.u1Present)
1075 {
1076 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1077 {
1078 /*
1079 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1080 */
1081 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1082 Assert(PGMGetGuestMode(pVM) <= PGMMODE_PAE);
1083 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
1084 }
1085 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
1086 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1087 {
1088 /*
1089 * Mark not present so we can resync the PDE when it's used.
1090 */
1091 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1092 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1093# if PGM_GST_TYPE == PGM_TYPE_AMD64
1094 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1095# else
1096 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1097# endif
1098 pPdeDst->u = 0;
1099 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1100 PGM_INVL_GUEST_TLBS();
1101 }
1102 else if (!PdeSrc.n.u1Accessed)
1103 {
1104 /*
1105 * Mark not present so we can set the accessed bit.
1106 */
1107 LogFlow(("InvalidatePage: Out-of-sync (A) at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1108 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1109# if PGM_GST_TYPE == PGM_TYPE_AMD64
1110 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1111# else
1112 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1113# endif
1114 pPdeDst->u = 0;
1115 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNAs));
1116 PGM_INVL_GUEST_TLBS();
1117 }
1118 else if (!fIsBigPage)
1119 {
1120 /*
1121 * 4KB - page.
1122 */
1123 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1124 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1125# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1126 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1127 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1128# endif
1129 if (pShwPage->GCPhys == GCPhys)
1130 {
1131# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1132 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1133 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1134 if (pPT->a[iPTEDst].n.u1Present)
1135 {
1136# ifdef PGMPOOL_WITH_USER_TRACKING
1137 /* This is very unlikely with caching/monitoring enabled. */
1138 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1139# endif
1140 pPT->a[iPTEDst].u = 0;
1141 }
1142# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1143 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
1144 if (VBOX_SUCCESS(rc))
1145 rc = VINF_SUCCESS;
1146# endif
1147 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4KBPages));
1148 PGM_INVL_PG(GCPtrPage);
1149 }
1150 else
1151 {
1152 /*
1153 * The page table address changed.
1154 */
1155 LogFlow(("InvalidatePage: Out-of-sync at %VGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%VGp iPDDst=%#x\n",
1156 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1157# if PGM_GST_TYPE == PGM_TYPE_AMD64
1158 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1159# else
1160 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1161# endif
1162 pPdeDst->u = 0;
1163 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDOutOfSync));
1164 PGM_INVL_GUEST_TLBS();
1165 }
1166 }
1167 else
1168 {
1169 /*
1170 * 2/4MB - page.
1171 */
1172 /* Before freeing the page, check if anything really changed. */
1173 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1174 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
1175# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1176 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1177 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1178# endif
1179 if ( pShwPage->GCPhys == GCPhys
1180 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1181 {
1182 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1183 /** @todo PAT */
1184 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1185 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1186 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1187 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1188 {
1189 LogFlow(("Skipping flush for big page containing %VGv (PD=%X .u=%VX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1190 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPagesSkip));
1191 return VINF_SUCCESS;
1192 }
1193 }
1194
1195 /*
1196 * Ok, the page table is present and it's been changed in the guest.
1197 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1198 * We could do this for some flushes in GC too, but we need an algorithm for
1199 * deciding which 4MB pages containing code likely to be executed very soon.
1200 */
1201 LogFlow(("InvalidatePage: Out-of-sync PD at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1202 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1203# if PGM_GST_TYPE == PGM_TYPE_AMD64
1204 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1205# else
1206 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1207# endif
1208 pPdeDst->u = 0;
1209 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePage4MBPages));
1210 PGM_INVL_BIG_PG(GCPtrPage);
1211 }
1212 }
1213 else
1214 {
1215 /*
1216 * Page directory is not present, mark shadow PDE not present.
1217 */
1218 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1219 {
1220# if PGM_GST_TYPE == PGM_TYPE_AMD64
1221 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1222# else
1223 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1224# endif
1225 pPdeDst->u = 0;
1226 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDNPs));
1227 PGM_INVL_PG(GCPtrPage);
1228 }
1229 else
1230 {
1231 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1232 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,InvalidatePagePDMappings));
1233 }
1234 }
1235
1236 return rc;
1237
1238#else /* guest real and protected mode */
1239 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1240 return VINF_SUCCESS;
1241#endif
1242}
1243
1244
1245#ifdef PGMPOOL_WITH_USER_TRACKING
1246/**
1247 * Update the tracking of shadowed pages.
1248 *
1249 * @param pVM The VM handle.
1250 * @param pShwPage The shadow page.
1251 * @param HCPhys The physical page we is being dereferenced.
1252 */
1253DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1254{
1255# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1256 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1257 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%VHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1258
1259 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1260 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1261 * 2. write protect all shadowed pages. I.e. implement caching.
1262 */
1263 /*
1264 * Find the guest address.
1265 */
1266 for (PPGMRAMRANGE pRam = CTXALLSUFF(pVM->pgm.s.pRamRanges);
1267 pRam;
1268 pRam = CTXALLSUFF(pRam->pNext))
1269 {
1270 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1271 while (iPage-- > 0)
1272 {
1273 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1274 {
1275 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1276 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1277 pShwPage->cPresent--;
1278 pPool->cPresent--;
1279 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1280 return;
1281 }
1282 }
1283 }
1284
1285 for (;;)
1286 AssertReleaseMsgFailed(("HCPhys=%VHp wasn't found!\n", HCPhys));
1287# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1288 pShwPage->cPresent--;
1289 pVM->pgm.s.CTXSUFF(pPool)->cPresent--;
1290# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1291}
1292
1293
1294/**
1295 * Update the tracking of shadowed pages.
1296 *
1297 * @param pVM The VM handle.
1298 * @param pShwPage The shadow page.
1299 * @param u16 The top 16-bit of the pPage->HCPhys.
1300 * @param pPage Pointer to the guest page. this will be modified.
1301 * @param iPTDst The index into the shadow table.
1302 */
1303DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1304{
1305# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1306 /*
1307 * We're making certain assumptions about the placement of cRef and idx.
1308 */
1309 Assert(MM_RAM_FLAGS_IDX_SHIFT == 48);
1310 Assert(MM_RAM_FLAGS_CREFS_SHIFT > MM_RAM_FLAGS_IDX_SHIFT);
1311
1312 /*
1313 * Just deal with the simple first time here.
1314 */
1315 if (!u16)
1316 {
1317 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1318 u16 = (1 << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) | pShwPage->idx;
1319 }
1320 else
1321 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1322
1323 /* write back, trying to be clever... */
1324 Log2(("SyncPageWorkerTrackAddRef: u16=%#x pPage->HCPhys=%VHp->%VHp iPTDst=%#x\n",
1325 u16, pPage->HCPhys, (pPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) | ((uint64_t)u16 << MM_RAM_FLAGS_CREFS_SHIFT), iPTDst));
1326 *((uint16_t *)&pPage->HCPhys + 3) = u16; /** @todo PAGE FLAGS */
1327# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1328
1329 /* update statistics. */
1330 pVM->pgm.s.CTXSUFF(pPool)->cPresent++;
1331 pShwPage->cPresent++;
1332 if (pShwPage->iFirstPresent > iPTDst)
1333 pShwPage->iFirstPresent = iPTDst;
1334}
1335#endif /* PGMPOOL_WITH_USER_TRACKING */
1336
1337
1338/**
1339 * Creates a 4K shadow page for a guest page.
1340 *
1341 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1342 * physical address. The PdeSrc argument only the flags are used. No page structured
1343 * will be mapped in this function.
1344 *
1345 * @param pVM VM handle.
1346 * @param pPteDst Destination page table entry.
1347 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1348 * Can safely assume that only the flags are being used.
1349 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1350 * @param pShwPage Pointer to the shadow page.
1351 * @param iPTDst The index into the shadow table.
1352 *
1353 * @remark Not used for 2/4MB pages!
1354 */
1355DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1356{
1357 if (PteSrc.n.u1Present)
1358 {
1359 /*
1360 * Find the ram range.
1361 */
1362 PPGMPAGE pPage;
1363 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1364 if (VBOX_SUCCESS(rc))
1365 {
1366 /** @todo investiage PWT, PCD and PAT. */
1367 /*
1368 * Make page table entry.
1369 */
1370 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo FLAGS */
1371 SHWPTE PteDst;
1372 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1373 {
1374 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1375 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1376 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1377 | (HCPhys & X86_PTE_PAE_PG_MASK);
1378 else
1379 {
1380 LogFlow(("SyncPageWorker: monitored page (%VGp) -> mark not present\n", HCPhys));
1381 PteDst.u = 0;
1382 }
1383 /** @todo count these two kinds. */
1384 }
1385 else
1386 {
1387 /*
1388 * If the page or page directory entry is not marked accessed,
1389 * we mark the page not present.
1390 */
1391 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1392 {
1393 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1394 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,AccessedPage));
1395 PteDst.u = 0;
1396 }
1397 else
1398 /*
1399 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1400 * when the page is modified.
1401 */
1402 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1403 {
1404 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPage));
1405 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1406 | (HCPhys & X86_PTE_PAE_PG_MASK)
1407 | PGM_PTFLAGS_TRACK_DIRTY;
1408 }
1409 else
1410 {
1411 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageSkipped));
1412 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1413 | (HCPhys & X86_PTE_PAE_PG_MASK);
1414 }
1415 }
1416
1417#ifdef PGMPOOL_WITH_USER_TRACKING
1418 /*
1419 * Keep user track up to date.
1420 */
1421 if (PteDst.n.u1Present)
1422 {
1423 if (!pPteDst->n.u1Present)
1424 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1425 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1426 {
1427 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1428 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1429 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1430 }
1431 }
1432 else if (pPteDst->n.u1Present)
1433 {
1434 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1435 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1436 }
1437#endif /* PGMPOOL_WITH_USER_TRACKING */
1438
1439 /*
1440 * Update statistics and commit the entry.
1441 */
1442 if (!PteSrc.n.u1Global)
1443 pShwPage->fSeenNonGlobal = true;
1444 *pPteDst = PteDst;
1445 }
1446 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1447 /** @todo count these. */
1448 }
1449 else
1450 {
1451 /*
1452 * Page not-present.
1453 */
1454 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1455#ifdef PGMPOOL_WITH_USER_TRACKING
1456 /* Keep user track up to date. */
1457 if (pPteDst->n.u1Present)
1458 {
1459 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1460 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1461 }
1462#endif /* PGMPOOL_WITH_USER_TRACKING */
1463 pPteDst->u = 0;
1464 /** @todo count these. */
1465 }
1466}
1467
1468
1469/**
1470 * Syncs a guest OS page.
1471 *
1472 * There are no conflicts at this point, neither is there any need for
1473 * page table allocations.
1474 *
1475 * @returns VBox status code.
1476 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1477 * @param pVM VM handle.
1478 * @param PdeSrc Page directory entry of the guest.
1479 * @param GCPtrPage Guest context page address.
1480 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1481 * @param uErr Fault error (X86_TRAP_PF_*).
1482 */
1483PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCUINTPTR GCPtrPage, unsigned cPages, unsigned uErr)
1484{
1485 LogFlow(("SyncPage: GCPtrPage=%VGv cPages=%d uErr=%#x\n", GCPtrPage, cPages, uErr));
1486
1487#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1488 || PGM_GST_TYPE == PGM_TYPE_PAE \
1489 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1490 && PGM_SHW_TYPE != PGM_TYPE_NESTED
1491
1492# if PGM_WITH_NX(PGM_GST_TYPE)
1493 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1494# endif
1495
1496 /*
1497 * Assert preconditions.
1498 */
1499 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1500 Assert(PdeSrc.n.u1Present);
1501 Assert(cPages);
1502
1503 /*
1504 * Get the shadow PDE, find the shadow page table in the pool.
1505 */
1506# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1507 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1508 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1509# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1510 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1511 const unsigned iPdpte = (GCPtrPage >> X86_PDPT_SHIFT); /* no mask; flat index into the 2048 entry array. */
1512 PX86PDPT pPdptDst = pVM->pgm.s.CTXMID(p,PaePDPT);
1513 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1514# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1515 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1516 const unsigned iPdpte = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1517 PX86PDPAE pPDDst;
1518 X86PDEPAE PdeDst;
1519 PX86PDPT pPdptDst;
1520
1521 int rc = PGMShwGetLongModePDPtr(pVM, GCPtrPage, &pPdptDst, &pPDDst);
1522 AssertRCReturn(rc, rc);
1523 Assert(pPDDst && pPdptDst);
1524 PdeDst = pPDDst->a[iPDDst];
1525# endif
1526 Assert(PdeDst.n.u1Present);
1527 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1528
1529# if PGM_GST_TYPE == PGM_TYPE_AMD64
1530 /* Fetch the pgm pool shadow descriptor. */
1531 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpte].u & X86_PDPE_PG_MASK);
1532 Assert(pShwPde);
1533# endif
1534
1535 /*
1536 * Check that the page is present and that the shadow PDE isn't out of sync.
1537 */
1538# if PGM_GST_TYPE == PGM_TYPE_AMD64
1539 const bool fBigPage = PdeSrc.b.u1Size;
1540# else
1541 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1542# endif
1543 RTGCPHYS GCPhys;
1544 if (!fBigPage)
1545 {
1546 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1547# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1548 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1549 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1550# endif
1551 }
1552 else
1553 {
1554 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
1555# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1556 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1557 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1558# endif
1559 }
1560 if ( pShwPage->GCPhys == GCPhys
1561 && PdeSrc.n.u1Present
1562 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1563 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1564# if PGM_WITH_NX(PGM_GST_TYPE)
1565 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1566# endif
1567 )
1568 {
1569 /*
1570 * Check that the PDE is marked accessed already.
1571 * Since we set the accessed bit *before* getting here on a #PF, this
1572 * check is only meant for dealing with non-#PF'ing paths.
1573 */
1574 if (PdeSrc.n.u1Accessed)
1575 {
1576 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1577 if (!fBigPage)
1578 {
1579 /*
1580 * 4KB Page - Map the guest page table.
1581 */
1582 PGSTPT pPTSrc;
1583 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1584 if (VBOX_SUCCESS(rc))
1585 {
1586# ifdef PGM_SYNC_N_PAGES
1587 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1588 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1589 {
1590 /*
1591 * This code path is currently only taken when the caller is PGMTrap0eHandler
1592 * for non-present pages!
1593 *
1594 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1595 * deal with locality.
1596 */
1597 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1598# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1599 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1600 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1601# else
1602 const unsigned offPTSrc = 0;
1603# endif
1604 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1605 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1606 iPTDst = 0;
1607 else
1608 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1609 for (; iPTDst < iPTDstEnd; iPTDst++)
1610 {
1611 if (!pPTDst->a[iPTDst].n.u1Present)
1612 {
1613 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1614 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1615 NOREF(GCPtrCurPage);
1616#ifndef IN_RING0
1617 /*
1618 * Assuming kernel code will be marked as supervisor - and not as user level
1619 * and executed using a conforming code selector - And marked as readonly.
1620 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1621 */
1622 PPGMPAGE pPage;
1623 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1624 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1625 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)GCPtrCurPage)
1626 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1627 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1628 )
1629#endif /* else: CSAM not active */
1630 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1631 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1632 GCPtrCurPage, PteSrc.n.u1Present,
1633 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1634 PteSrc.n.u1User & PdeSrc.n.u1User,
1635 (uint64_t)PteSrc.u,
1636 (uint64_t)pPTDst->a[iPTDst].u,
1637 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1638 }
1639 }
1640 }
1641 else
1642# endif /* PGM_SYNC_N_PAGES */
1643 {
1644 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1645 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1646 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1647 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1648 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1649 GCPtrPage, PteSrc.n.u1Present,
1650 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1651 PteSrc.n.u1User & PdeSrc.n.u1User,
1652 (uint64_t)PteSrc.u,
1653 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1654 }
1655 }
1656 else /* MMIO or invalid page: emulated in #PF handler. */
1657 {
1658 LogFlow(("PGM_GCPHYS_2_PTR %VGp failed with %Vrc\n", GCPhys, rc));
1659 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1660 }
1661 }
1662 else
1663 {
1664 /*
1665 * 4/2MB page - lazy syncing shadow 4K pages.
1666 * (There are many causes of getting here, it's no longer only CSAM.)
1667 */
1668 /* Calculate the GC physical address of this 4KB shadow page. */
1669 RTGCPHYS GCPhys = (PdeSrc.u & GST_PDE_BIG_PG_MASK) | ((RTGCUINTPTR)GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1670 /* Find ram range. */
1671 PPGMPAGE pPage;
1672 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1673 if (VBOX_SUCCESS(rc))
1674 {
1675 /*
1676 * Make shadow PTE entry.
1677 */
1678 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo PAGE FLAGS */
1679 SHWPTE PteDst;
1680 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1681 | (HCPhys & X86_PTE_PAE_PG_MASK);
1682 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1683 {
1684 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1685 PteDst.n.u1Write = 0;
1686 else
1687 PteDst.u = 0;
1688 }
1689 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1690# ifdef PGMPOOL_WITH_USER_TRACKING
1691 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1692 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1693# endif
1694 pPTDst->a[iPTDst] = PteDst;
1695
1696
1697 /*
1698 * If the page is not flagged as dirty and is writable, then make it read-only
1699 * at PD level, so we can set the dirty bit when the page is modified.
1700 *
1701 * ASSUMES that page access handlers are implemented on page table entry level.
1702 * Thus we will first catch the dirty access and set PDE.D and restart. If
1703 * there is an access handler, we'll trap again and let it work on the problem.
1704 */
1705 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1706 * As for invlpg, it simply frees the whole shadow PT.
1707 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1708 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1709 {
1710 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
1711 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1712 PdeDst.n.u1Write = 0;
1713 }
1714 else
1715 {
1716 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1717 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1718 }
1719# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1720 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst] = PdeDst;
1721# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1722 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst] = PdeDst;
1723# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1724 pPDDst->a[iPDDst] = PdeDst;
1725# endif
1726 Log2(("SyncPage: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%VGp%s\n",
1727 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1728 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1729 }
1730 else
1731 LogFlow(("PGM_GCPHYS_2_PTR %VGp (big) failed with %Vrc\n", GCPhys, rc));
1732 }
1733 return VINF_SUCCESS;
1734 }
1735 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDNAs));
1736 }
1737 else
1738 {
1739 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPagePDOutOfSync));
1740 Log2(("SyncPage: Out-Of-Sync PDE at %VGp PdeSrc=%RX64 PdeDst=%RX64\n",
1741 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1742 }
1743
1744 /*
1745 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1746 * Yea, I'm lazy.
1747 */
1748 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1749# if PGM_GST_TYPE == PGM_TYPE_AMD64
1750 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1751# else
1752 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPDDst);
1753# endif
1754
1755# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1756 pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst].u = 0;
1757# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1758 pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst].u = 0;
1759# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1760 pPDDst->a[iPDDst].u = 0;
1761# endif
1762 PGM_INVL_GUEST_TLBS();
1763 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1764
1765#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1766 && PGM_SHW_TYPE != PGM_TYPE_NESTED
1767
1768# ifdef PGM_SYNC_N_PAGES
1769 /*
1770 * Get the shadow PDE, find the shadow page table in the pool.
1771 */
1772# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1773 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
1774 X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[iPDDst];
1775# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1776 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT; /* no mask; flat index into the 2048 entry array. */
1777 X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[iPDDst];
1778# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1779 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1780 const unsigned iPdpte = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1781 PX86PDPAE pPDDst;
1782 X86PDEPAE PdeDst;
1783 PX86PDPT pPdptDst;
1784
1785 int rc = PGMShwGetLongModePDPtr(pVM, GCPtrPage, &pPdptDst, &pPDDst);
1786 AssertRCReturn(rc, rc);
1787 Assert(pPDDst && pPdptDst);
1788 PdeDst = pPDDst->a[iPDDst];
1789# endif
1790 Assert(PdeDst.n.u1Present);
1791 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1792 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1793
1794 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1795 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1796 {
1797 /*
1798 * This code path is currently only taken when the caller is PGMTrap0eHandler
1799 * for non-present pages!
1800 *
1801 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1802 * deal with locality.
1803 */
1804 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1805 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
1806 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1807 iPTDst = 0;
1808 else
1809 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1810 for (; iPTDst < iPTDstEnd; iPTDst++)
1811 {
1812 if (!pPTDst->a[iPTDst].n.u1Present)
1813 {
1814 GSTPTE PteSrc;
1815
1816 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
1817
1818 /* Fake the page table entry */
1819 PteSrc.u = GCPtrCurPage;
1820 PteSrc.n.u1Present = 1;
1821 PteSrc.n.u1Dirty = 1;
1822 PteSrc.n.u1Accessed = 1;
1823 PteSrc.n.u1Write = 1;
1824 PteSrc.n.u1User = 1;
1825
1826 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1827
1828 Log2(("SyncPage: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1829 GCPtrCurPage, PteSrc.n.u1Present,
1830 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1831 PteSrc.n.u1User & PdeSrc.n.u1User,
1832 (uint64_t)PteSrc.u,
1833 (uint64_t)pPTDst->a[iPTDst].u,
1834 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1835 }
1836 else
1837 Log4(("%VGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
1838 }
1839 }
1840 else
1841# endif /* PGM_SYNC_N_PAGES */
1842 {
1843 GSTPTE PteSrc;
1844 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1845 RTGCUINTPTR GCPtrCurPage = ((RTGCUINTPTR)GCPtrPage & ~(RTGCUINTPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
1846
1847 /* Fake the page table entry */
1848 PteSrc.u = GCPtrCurPage;
1849 PteSrc.n.u1Present = 1;
1850 PteSrc.n.u1Dirty = 1;
1851 PteSrc.n.u1Accessed = 1;
1852 PteSrc.n.u1Write = 1;
1853 PteSrc.n.u1User = 1;
1854 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1855
1856 Log2(("SyncPage: 4K %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
1857 GCPtrPage, PteSrc.n.u1Present,
1858 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1859 PteSrc.n.u1User & PdeSrc.n.u1User,
1860 (uint64_t)PteSrc.u,
1861 (uint64_t)pPTDst->a[iPTDst].u,
1862 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1863 }
1864 return VINF_SUCCESS;
1865
1866#else
1867 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
1868 return VERR_INTERNAL_ERROR;
1869#endif
1870}
1871
1872
1873
1874#if PGM_WITH_PAGING(PGM_GST_TYPE)
1875
1876/**
1877 * Investigate page fault and handle write protection page faults caused by
1878 * dirty bit tracking.
1879 *
1880 * @returns VBox status code.
1881 * @param pVM VM handle.
1882 * @param uErr Page fault error code.
1883 * @param pPdeDst Shadow page directory entry.
1884 * @param pPdeSrc Guest page directory entry.
1885 * @param GCPtrPage Guest context page address.
1886 */
1887PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCUINTPTR GCPtrPage)
1888{
1889 bool fWriteProtect = !!(CPUMGetGuestCR0(pVM) & X86_CR0_WP);
1890 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
1891 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
1892# if PGM_GST_TYPE == PGM_TYPE_AMD64
1893 bool fBigPagesSupported = true;
1894# else
1895 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1896# endif
1897# if PGM_WITH_NX(PGM_GST_TYPE)
1898 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1899# endif
1900 unsigned uPageFaultLevel;
1901 int rc;
1902
1903 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
1904 LogFlow(("CheckPageFault: GCPtrPage=%VGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
1905
1906# if PGM_GST_TYPE == PGM_TYPE_PAE \
1907 || PGM_GST_TYPE == PGM_TYPE_AMD64
1908
1909# if PGM_GST_TYPE == PGM_TYPE_AMD64
1910 PX86PML4E pPml4eSrc;
1911 PX86PDPE pPdpeSrc;
1912
1913 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc);
1914 Assert(pPml4eSrc);
1915
1916 /*
1917 * Real page fault? (PML4E level)
1918 */
1919 if ( (uErr & X86_TRAP_PF_RSVD)
1920 || !pPml4eSrc->n.u1Present
1921 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
1922 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
1923 || (fUserLevelFault && !pPml4eSrc->n.u1User)
1924 )
1925 {
1926 uPageFaultLevel = 0;
1927 goto UpperLevelPageFault;
1928 }
1929 Assert(pPdpeSrc);
1930
1931# else /* PAE */
1932 PX86PDPE pPdpeSrc = &pVM->pgm.s.CTXSUFF(pGstPaePDPT)->a[(GCPtrPage >> GST_PDPT_SHIFT) & GST_PDPT_MASK];
1933# endif
1934
1935 /*
1936 * Real page fault? (PDPE level)
1937 */
1938 if ( (uErr & X86_TRAP_PF_RSVD)
1939 || !pPdpeSrc->n.u1Present
1940# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
1941 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
1942 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
1943 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
1944# endif
1945 )
1946 {
1947 uPageFaultLevel = 1;
1948 goto UpperLevelPageFault;
1949 }
1950# endif
1951
1952 /*
1953 * Real page fault? (PDE level)
1954 */
1955 if ( (uErr & X86_TRAP_PF_RSVD)
1956 || !pPdeSrc->n.u1Present
1957# if PGM_WITH_NX(PGM_GST_TYPE)
1958 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
1959# endif
1960 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
1961 || (fUserLevelFault && !pPdeSrc->n.u1User) )
1962 {
1963 uPageFaultLevel = 2;
1964 goto UpperLevelPageFault;
1965 }
1966
1967 /*
1968 * First check the easy case where the page directory has been marked read-only to track
1969 * the dirty bit of an emulated BIG page
1970 */
1971 if (pPdeSrc->b.u1Size && fBigPagesSupported)
1972 {
1973 /* Mark guest page directory as accessed */
1974# if PGM_GST_TYPE == PGM_TYPE_AMD64
1975 pPml4eSrc->n.u1Accessed = 1;
1976 pPdpeSrc->lm.u1Accessed = 1;
1977# endif
1978 pPdeSrc->b.u1Accessed = 1;
1979
1980 /*
1981 * Only write protection page faults are relevant here.
1982 */
1983 if (fWriteFault)
1984 {
1985 /* Mark guest page directory as dirty (BIG page only). */
1986 pPdeSrc->b.u1Dirty = 1;
1987
1988 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
1989 {
1990 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
1991
1992 Assert(pPdeSrc->b.u1Write);
1993
1994 pPdeDst->n.u1Write = 1;
1995 pPdeDst->n.u1Accessed = 1;
1996 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1997 PGM_INVL_BIG_PG(GCPtrPage);
1998 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
1999 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2000 }
2001 }
2002 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2003 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2004 }
2005 /* else: 4KB page table */
2006
2007 /*
2008 * Map the guest page table.
2009 */
2010 PGSTPT pPTSrc;
2011 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2012 if (VBOX_SUCCESS(rc))
2013 {
2014 /*
2015 * Real page fault?
2016 */
2017 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2018 const GSTPTE PteSrc = *pPteSrc;
2019 if ( !PteSrc.n.u1Present
2020# if PGM_WITH_NX(PGM_GST_TYPE)
2021 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
2022# endif
2023 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
2024 || (fUserLevelFault && !PteSrc.n.u1User)
2025 )
2026 {
2027# ifdef IN_GC
2028 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
2029# endif
2030 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2031 LogFlow(("CheckPageFault: real page fault at %VGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2032
2033 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2034 * See the 2nd case above as well.
2035 */
2036 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2037 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2038
2039 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2040 return VINF_EM_RAW_GUEST_TRAP;
2041 }
2042 LogFlow(("CheckPageFault: page fault at %VGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2043
2044 /*
2045 * Set the accessed bits in the page directory and the page table.
2046 */
2047# if PGM_GST_TYPE == PGM_TYPE_AMD64
2048 pPml4eSrc->n.u1Accessed = 1;
2049 pPdpeSrc->lm.u1Accessed = 1;
2050# endif
2051 pPdeSrc->n.u1Accessed = 1;
2052 pPteSrc->n.u1Accessed = 1;
2053
2054 /*
2055 * Only write protection page faults are relevant here.
2056 */
2057 if (fWriteFault)
2058 {
2059 /* Write access, so mark guest entry as dirty. */
2060# if defined(IN_GC) && defined(VBOX_WITH_STATISTICS)
2061 if (!pPteSrc->n.u1Dirty)
2062 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtiedPage);
2063 else
2064 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageAlreadyDirty);
2065# endif
2066
2067 pPteSrc->n.u1Dirty = 1;
2068
2069 if (pPdeDst->n.u1Present)
2070 {
2071 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2072 * Our individual shadow handlers will provide more information and force a fatal exit.
2073 */
2074 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2075 {
2076 LogRel(("CheckPageFault: write to hypervisor region %VGv\n", GCPtrPage));
2077 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2078 return VINF_SUCCESS;
2079 }
2080
2081 /*
2082 * Map shadow page table.
2083 */
2084 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2085 if (pShwPage)
2086 {
2087 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2088 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2089 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
2090 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
2091 {
2092 LogFlow(("DIRTY page trap addr=%VGv\n", GCPtrPage));
2093# ifdef VBOX_STRICT
2094 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2095 if (pPage)
2096 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
2097 ("Unexpected dirty bit tracking on monitored page %VGv (phys %VGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
2098# endif
2099 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageTrap));
2100
2101 Assert(pPteSrc->n.u1Write);
2102
2103 pPteDst->n.u1Write = 1;
2104 pPteDst->n.u1Dirty = 1;
2105 pPteDst->n.u1Accessed = 1;
2106 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2107 PGM_INVL_PG(GCPtrPage);
2108
2109 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2110 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2111 }
2112 }
2113 else
2114 AssertMsgFailed(("pgmPoolGetPageByHCPhys %VGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2115 }
2116 }
2117/** @todo Optimize accessed bit emulation? */
2118# ifdef VBOX_STRICT
2119 /*
2120 * Sanity check.
2121 */
2122 else if ( !pPteSrc->n.u1Dirty
2123 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2124 && pPdeDst->n.u1Present)
2125 {
2126 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2127 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2128 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2129 if ( pPteDst->n.u1Present
2130 && pPteDst->n.u1Write)
2131 LogFlow(("Writable present page %VGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2132 }
2133# endif /* VBOX_STRICT */
2134 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2135 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2136 }
2137 AssertRC(rc);
2138 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,DirtyBitTracking), a);
2139 return rc;
2140
2141
2142UpperLevelPageFault:
2143 /* Pagefault detected while checking the PML4E, PDPE or PDE.
2144 * Single exit handler to get rid of duplicate code paths.
2145 */
2146# ifdef IN_GC
2147 STAM_COUNTER_INC(&pVM->pgm.s.StatGCDirtyTrackRealPF);
2148# endif
2149 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat, DirtyBitTracking), a);
2150 LogFlow(("CheckPageFault: real page fault at %VGv (%d)\n", GCPtrPage, uPageFaultLevel));
2151
2152 if (
2153# if PGM_GST_TYPE == PGM_TYPE_AMD64
2154 pPml4eSrc->n.u1Present &&
2155# endif
2156# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2157 pPdpeSrc->n.u1Present &&
2158# endif
2159 pPdeSrc->n.u1Present)
2160 {
2161 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2162 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2163 {
2164 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2165 }
2166 else
2167 {
2168 /*
2169 * Map the guest page table.
2170 */
2171 PGSTPT pPTSrc;
2172 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2173 if (VBOX_SUCCESS(rc))
2174 {
2175 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2176 const GSTPTE PteSrc = *pPteSrc;
2177 if (pPteSrc->n.u1Present)
2178 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2179 }
2180 AssertRC(rc);
2181 }
2182 }
2183 return VINF_EM_RAW_GUEST_TRAP;
2184}
2185
2186#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
2187
2188
2189/**
2190 * Sync a shadow page table.
2191 *
2192 * The shadow page table is not present. This includes the case where
2193 * there is a conflict with a mapping.
2194 *
2195 * @returns VBox status code.
2196 * @param pVM VM handle.
2197 * @param iPD Page directory index.
2198 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2199 * Assume this is a temporary mapping.
2200 * @param GCPtrPage GC Pointer of the page that caused the fault
2201 */
2202PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PGSTPD pPDSrc, RTGCUINTPTR GCPtrPage)
2203{
2204 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2205 STAM_COUNTER_INC(&pVM->pgm.s.StatGCSyncPtPD[iPDSrc]);
2206 LogFlow(("SyncPT: GCPtrPage=%VGv\n", GCPtrPage));
2207
2208#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2209 || PGM_GST_TYPE == PGM_TYPE_PAE \
2210 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2211 && PGM_SHW_TYPE != PGM_TYPE_NESTED
2212
2213 int rc = VINF_SUCCESS;
2214
2215 /*
2216 * Validate input a little bit.
2217 */
2218 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%VGv\n", iPDSrc, GCPtrPage));
2219# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2220 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2221 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
2222# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2223 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT; /* no mask; flat index into the 2048 entry array. */
2224 const unsigned iPdpte = (GCPtrPage >> X86_PDPT_SHIFT);
2225 PX86PDPT pPdptDst = pVM->pgm.s.CTXMID(p,PaePDPT);
2226 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2227# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2228 const unsigned iPdpte = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2229 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2230 PX86PDPAE pPDDst;
2231 PX86PDPT pPdptDst;
2232 rc = PGMShwGetLongModePDPtr(pVM, GCPtrPage, &pPdptDst, &pPDDst);
2233 if (rc != VINF_SUCCESS)
2234 {
2235 AssertRC(rc);
2236 return rc;
2237 }
2238 Assert(pPDDst);
2239# endif
2240
2241 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2242 SHWPDE PdeDst = *pPdeDst;
2243
2244# if PGM_GST_TYPE == PGM_TYPE_AMD64
2245 /* Fetch the pgm pool shadow descriptor. */
2246 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpte].u & X86_PDPE_PG_MASK);
2247 Assert(pShwPde);
2248# endif
2249
2250# ifndef PGM_WITHOUT_MAPPINGS
2251 /*
2252 * Check for conflicts.
2253 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2254 * HC: Simply resolve the conflict.
2255 */
2256 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2257 {
2258 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2259# ifndef IN_RING3
2260 Log(("SyncPT: Conflict at %VGv\n", GCPtrPage));
2261 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2262 return VERR_ADDRESS_CONFLICT;
2263# else
2264 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2265 Assert(pMapping);
2266# if PGM_GST_TYPE == PGM_TYPE_32BIT
2267 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2268# elif PGM_GST_TYPE == PGM_TYPE_PAE
2269 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2270# else
2271 AssertFailed(); /* can't happen for amd64 */
2272# endif
2273 if (VBOX_FAILURE(rc))
2274 {
2275 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2276 return rc;
2277 }
2278 PdeDst = *pPdeDst;
2279# endif
2280 }
2281# else /* PGM_WITHOUT_MAPPINGS */
2282 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2283# endif /* PGM_WITHOUT_MAPPINGS */
2284 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2285
2286 /*
2287 * Sync page directory entry.
2288 */
2289 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2290 if (PdeSrc.n.u1Present)
2291 {
2292 /*
2293 * Allocate & map the page table.
2294 */
2295 PSHWPT pPTDst;
2296# if PGM_GST_TYPE == PGM_TYPE_AMD64
2297 const bool fPageTable = !PdeSrc.b.u1Size;
2298# else
2299 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2300# endif
2301 PPGMPOOLPAGE pShwPage;
2302 RTGCPHYS GCPhys;
2303 if (fPageTable)
2304 {
2305 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2306# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2307 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2308 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2309# endif
2310# if PGM_GST_TYPE == PGM_TYPE_AMD64
2311 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2312# else
2313 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2314# endif
2315 }
2316 else
2317 {
2318 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
2319# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2320 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2321 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2322# endif
2323# if PGM_GST_TYPE == PGM_TYPE_AMD64
2324 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, pShwPde->idx, iPDDst, &pShwPage);
2325# else
2326 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2327# endif
2328 }
2329 if (rc == VINF_SUCCESS)
2330 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2331 else if (rc == VINF_PGM_CACHED_PAGE)
2332 {
2333 /*
2334 * The PT was cached, just hook it up.
2335 */
2336 if (fPageTable)
2337 PdeDst.u = pShwPage->Core.Key
2338 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2339 else
2340 {
2341 PdeDst.u = pShwPage->Core.Key
2342 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2343 /* (see explanation and assumptions further down.) */
2344 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2345 {
2346 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
2347 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2348 PdeDst.b.u1Write = 0;
2349 }
2350 }
2351 *pPdeDst = PdeDst;
2352 return VINF_SUCCESS;
2353 }
2354 else if (rc == VERR_PGM_POOL_FLUSHED)
2355 {
2356 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2357 return VINF_PGM_SYNC_CR3;
2358 }
2359 else
2360 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
2361 PdeDst.u &= X86_PDE_AVL_MASK;
2362 PdeDst.u |= pShwPage->Core.Key;
2363
2364 /*
2365 * Page directory has been accessed (this is a fault situation, remember).
2366 */
2367 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2368 if (fPageTable)
2369 {
2370 /*
2371 * Page table - 4KB.
2372 *
2373 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2374 */
2375 Log2(("SyncPT: 4K %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2376 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2377 PGSTPT pPTSrc;
2378 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2379 if (VBOX_SUCCESS(rc))
2380 {
2381 /*
2382 * Start by syncing the page directory entry so CSAM's TLB trick works.
2383 */
2384 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2385 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2386 *pPdeDst = PdeDst;
2387
2388 /*
2389 * Directory/page user or supervisor privilege: (same goes for read/write)
2390 *
2391 * Directory Page Combined
2392 * U/S U/S U/S
2393 * 0 0 0
2394 * 0 1 0
2395 * 1 0 0
2396 * 1 1 1
2397 *
2398 * Simple AND operation. Table listed for completeness.
2399 *
2400 */
2401 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4k));
2402# ifdef PGM_SYNC_N_PAGES
2403 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2404 unsigned iPTDst = iPTBase;
2405 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, ELEMENTS(pPTDst->a));
2406 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2407 iPTDst = 0;
2408 else
2409 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2410# else /* !PGM_SYNC_N_PAGES */
2411 unsigned iPTDst = 0;
2412 const unsigned iPTDstEnd = ELEMENTS(pPTDst->a);
2413# endif /* !PGM_SYNC_N_PAGES */
2414# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2415 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2416 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2417# else
2418 const unsigned offPTSrc = 0;
2419# endif
2420 for (; iPTDst < iPTDstEnd; iPTDst++)
2421 {
2422 const unsigned iPTSrc = iPTDst + offPTSrc;
2423 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2424
2425 if (PteSrc.n.u1Present) /* we've already cleared it above */
2426 {
2427# ifndef IN_RING0
2428 /*
2429 * Assuming kernel code will be marked as supervisor - and not as user level
2430 * and executed using a conforming code selector - And marked as readonly.
2431 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2432 */
2433 PPGMPAGE pPage;
2434 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2435 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2436 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2437 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2438 )
2439# endif
2440 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2441 Log2(("SyncPT: 4K+ %VGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%VGp\n",
2442 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2443 PteSrc.n.u1Present,
2444 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2445 PteSrc.n.u1User & PdeSrc.n.u1User,
2446 (uint64_t)PteSrc.u,
2447 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2448 (PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)));
2449 }
2450 } /* for PTEs */
2451 }
2452 }
2453 else
2454 {
2455 /*
2456 * Big page - 2/4MB.
2457 *
2458 * We'll walk the ram range list in parallel and optimize lookups.
2459 * We will only sync on shadow page table at a time.
2460 */
2461 STAM_COUNTER_INC(CTXSUFF(&pVM->pgm.s.StatSynPT4M));
2462
2463 /**
2464 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2465 */
2466
2467 /*
2468 * Start by syncing the page directory entry.
2469 */
2470 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2471 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2472
2473 /*
2474 * If the page is not flagged as dirty and is writable, then make it read-only
2475 * at PD level, so we can set the dirty bit when the page is modified.
2476 *
2477 * ASSUMES that page access handlers are implemented on page table entry level.
2478 * Thus we will first catch the dirty access and set PDE.D and restart. If
2479 * there is an access handler, we'll trap again and let it work on the problem.
2480 */
2481 /** @todo move the above stuff to a section in the PGM documentation. */
2482 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2483 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2484 {
2485 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,DirtyPageBig));
2486 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2487 PdeDst.b.u1Write = 0;
2488 }
2489 *pPdeDst = PdeDst;
2490
2491 /*
2492 * Fill the shadow page table.
2493 */
2494 /* Get address and flags from the source PDE. */
2495 SHWPTE PteDstBase;
2496 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2497
2498 /* Loop thru the entries in the shadow PT. */
2499 const RTGCUINTPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2500 Log2(("SyncPT: BIG %VGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%VGv GCPhys=%VGp %s\n",
2501 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2502 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2503 PPGMRAMRANGE pRam = CTXALLSUFF(pVM->pgm.s.pRamRanges);
2504 unsigned iPTDst = 0;
2505 while (iPTDst < ELEMENTS(pPTDst->a))
2506 {
2507 /* Advance ram range list. */
2508 while (pRam && GCPhys > pRam->GCPhysLast)
2509 pRam = CTXALLSUFF(pRam->pNext);
2510 if (pRam && GCPhys >= pRam->GCPhys)
2511 {
2512 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2513 do
2514 {
2515 /* Make shadow PTE. */
2516 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2517 SHWPTE PteDst;
2518
2519 /* Make sure the RAM has already been allocated. */
2520 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC) /** @todo PAGE FLAGS */
2521 {
2522 if (RT_UNLIKELY(!PGM_PAGE_GET_HCPHYS(pPage)))
2523 {
2524# ifdef IN_RING3
2525 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2526# else
2527 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2528# endif
2529 if (rc != VINF_SUCCESS)
2530 return rc;
2531 }
2532 }
2533
2534 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2535 {
2536 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2537 {
2538 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2539 PteDst.n.u1Write = 0;
2540 }
2541 else
2542 PteDst.u = 0;
2543 }
2544# ifndef IN_RING0
2545 /*
2546 * Assuming kernel code will be marked as supervisor and not as user level and executed
2547 * using a conforming code selector. Don't check for readonly, as that implies the whole
2548 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2549 */
2550 else if ( !PdeSrc.n.u1User
2551 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2552 PteDst.u = 0;
2553# endif
2554 else
2555 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2556# ifdef PGMPOOL_WITH_USER_TRACKING
2557 if (PteDst.n.u1Present)
2558 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, pPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst); /** @todo PAGE FLAGS */
2559# endif
2560 /* commit it */
2561 pPTDst->a[iPTDst] = PteDst;
2562 Log4(("SyncPT: BIG %VGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2563 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2564 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2565
2566 /* advance */
2567 GCPhys += PAGE_SIZE;
2568 iHCPage++;
2569 iPTDst++;
2570 } while ( iPTDst < ELEMENTS(pPTDst->a)
2571 && GCPhys <= pRam->GCPhysLast);
2572 }
2573 else if (pRam)
2574 {
2575 Log(("Invalid pages at %VGp\n", GCPhys));
2576 do
2577 {
2578 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2579 GCPhys += PAGE_SIZE;
2580 iPTDst++;
2581 } while ( iPTDst < ELEMENTS(pPTDst->a)
2582 && GCPhys < pRam->GCPhys);
2583 }
2584 else
2585 {
2586 Log(("Invalid pages at %VGp (2)\n", GCPhys));
2587 for ( ; iPTDst < ELEMENTS(pPTDst->a); iPTDst++)
2588 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2589 }
2590 } /* while more PTEs */
2591 } /* 4KB / 4MB */
2592 }
2593 else
2594 AssertRelease(!PdeDst.n.u1Present);
2595
2596 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2597# ifdef IN_GC
2598 if (VBOX_FAILURE(rc))
2599 STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncPTFailed));
2600# endif
2601 return rc;
2602
2603#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2604 && PGM_SHW_TYPE != PGM_TYPE_NESTED
2605
2606 int rc = VINF_SUCCESS;
2607
2608 /*
2609 * Validate input a little bit.
2610 */
2611# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2612 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2613 PX86PD pPDDst = pVM->pgm.s.CTXMID(p,32BitPD);
2614# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2615 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT; /* no mask; flat index into the 2048 entry array. */
2616 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
2617# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2618 const unsigned iPdpte = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2619 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2620 PX86PDPAE pPDDst;
2621 PX86PDPT pPdptDst;
2622 rc = PGMShwGetLongModePDPtr(pVM, GCPtrPage, &pPdptDst, &pPDDst);
2623 if (rc != VINF_SUCCESS)
2624 {
2625 AssertRC(rc);
2626 return rc;
2627 }
2628 Assert(pPDDst);
2629
2630 /* Fetch the pgm pool shadow descriptor. */
2631 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpte].u & X86_PDPE_PG_MASK);
2632 Assert(pShwPde);
2633# endif
2634 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2635 SHWPDE PdeDst = *pPdeDst;
2636
2637 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2638 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2639
2640 GSTPDE PdeSrc;
2641 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2642 PdeSrc.n.u1Present = 1;
2643 PdeSrc.n.u1Write = 1;
2644 PdeSrc.n.u1Accessed = 1;
2645 PdeSrc.n.u1User = 1;
2646
2647 /*
2648 * Allocate & map the page table.
2649 */
2650 PSHWPT pPTDst;
2651 PPGMPOOLPAGE pShwPage;
2652 RTGCPHYS GCPhys;
2653
2654 /* Virtual address = physical address */
2655 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
2656# if PGM_SHW_TYPE == PGM_TYPE_AMD64
2657 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2658# else
2659 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2660# endif
2661
2662 if ( rc == VINF_SUCCESS
2663 || rc == VINF_PGM_CACHED_PAGE)
2664 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2665 else
2666 AssertMsgFailedReturn(("rc=%Vrc\n", rc), VERR_INTERNAL_ERROR);
2667
2668 PdeDst.u &= X86_PDE_AVL_MASK;
2669 PdeDst.u |= pShwPage->Core.Key;
2670 PdeDst.n.u1Present = 1;
2671 PdeDst.n.u1Write = 1;
2672 PdeDst.n.u1User = 1;
2673 PdeDst.n.u1Accessed = 1;
2674 *pPdeDst = PdeDst;
2675
2676 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, (RTGCUINTPTR)GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2677 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2678 return rc;
2679
2680#else
2681 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2682 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncPT), a);
2683 return VERR_INTERNAL_ERROR;
2684#endif
2685}
2686
2687
2688
2689/**
2690 * Prefetch a page/set of pages.
2691 *
2692 * Typically used to sync commonly used pages before entering raw mode
2693 * after a CR3 reload.
2694 *
2695 * @returns VBox status code.
2696 * @param pVM VM handle.
2697 * @param GCPtrPage Page to invalidate.
2698 */
2699PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCUINTPTR GCPtrPage)
2700{
2701#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2702 && PGM_SHW_TYPE != PGM_TYPE_NESTED
2703 /*
2704 * Check that all Guest levels thru the PDE are present, getting the
2705 * PD and PDE in the processes.
2706 */
2707 int rc = VINF_SUCCESS;
2708# if PGM_WITH_PAGING(PGM_GST_TYPE)
2709# if PGM_GST_TYPE == PGM_TYPE_32BIT
2710 const unsigned iPDSrc = (RTGCUINTPTR)GCPtrPage >> GST_PD_SHIFT;
2711 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2712# elif PGM_GST_TYPE == PGM_TYPE_PAE
2713 unsigned iPDSrc;
2714 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
2715 if (!pPDSrc)
2716 return VINF_SUCCESS; /* not present */
2717# elif PGM_GST_TYPE == PGM_TYPE_AMD64
2718 unsigned iPDSrc;
2719 PX86PML4E pPml4eSrc;
2720 X86PDPE PdpeSrc;
2721 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
2722 if (!pPDSrc)
2723 return VINF_SUCCESS; /* not present */
2724# endif
2725 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2726# else
2727 PGSTPD pPDSrc = NULL;
2728 const unsigned iPDSrc = 0;
2729 GSTPDE PdeSrc;
2730
2731 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2732 PdeSrc.n.u1Present = 1;
2733 PdeSrc.n.u1Write = 1;
2734 PdeSrc.n.u1Accessed = 1;
2735 PdeSrc.n.u1User = 1;
2736# endif
2737
2738 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
2739 {
2740# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2741 const X86PDE PdeDst = pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> SHW_PD_SHIFT];
2742# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2743 const X86PDEPAE PdeDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> SHW_PD_SHIFT];
2744# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2745 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2746 PX86PDPAE pPDDst;
2747 X86PDEPAE PdeDst;
2748
2749# if PGM_GST_TYPE == PGM_TYPE_PROT
2750 /* AMD-V nested paging */
2751 X86PML4E Pml4eSrc;
2752 X86PDPE PdpeSrc;
2753 PX86PML4E pPml4eSrc = &Pml4eSrc;
2754
2755 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
2756 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
2757 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
2758# endif
2759
2760 int rc = PGMShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
2761 if (rc != VINF_SUCCESS)
2762 {
2763 AssertRC(rc);
2764 return rc;
2765 }
2766 Assert(pPDDst);
2767 PdeDst = pPDDst->a[iPDDst];
2768# endif
2769 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
2770 {
2771 if (!PdeDst.n.u1Present)
2772 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
2773 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2774 else
2775 {
2776 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
2777 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
2778 * makes no sense to prefetch more than one page.
2779 */
2780 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2781 if (VBOX_SUCCESS(rc))
2782 rc = VINF_SUCCESS;
2783 }
2784 }
2785 }
2786 return rc;
2787#elif PGM_SHW_TYPE == PGM_TYPE_NESTED
2788 return VINF_SUCCESS; /* ignore */
2789#endif
2790}
2791
2792
2793
2794
2795/**
2796 * Syncs a page during a PGMVerifyAccess() call.
2797 *
2798 * @returns VBox status code (informational included).
2799 * @param GCPtrPage The address of the page to sync.
2800 * @param fPage The effective guest page flags.
2801 * @param uErr The trap error code.
2802 */
2803PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCUINTPTR GCPtrPage, unsigned fPage, unsigned uErr)
2804{
2805 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%VGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
2806
2807 Assert(!HWACCMIsNestedPagingActive(pVM));
2808#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
2809 && PGM_SHW_TYPE != PGM_TYPE_NESTED
2810
2811# ifndef IN_RING0
2812 if (!(fPage & X86_PTE_US))
2813 {
2814 /*
2815 * Mark this page as safe.
2816 */
2817 /** @todo not correct for pages that contain both code and data!! */
2818 Log(("CSAMMarkPage %VGv; scanned=%d\n", GCPtrPage, true));
2819 CSAMMarkPage(pVM, (RTRCPTR)GCPtrPage, true);
2820 }
2821# endif
2822 /*
2823 * Get guest PD and index.
2824 */
2825
2826# if PGM_WITH_PAGING(PGM_GST_TYPE)
2827# if PGM_GST_TYPE == PGM_TYPE_32BIT
2828 const unsigned iPDSrc = (RTGCUINTPTR)GCPtrPage >> GST_PD_SHIFT;
2829 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
2830# elif PGM_GST_TYPE == PGM_TYPE_PAE
2831 unsigned iPDSrc;
2832 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc);
2833
2834 if (pPDSrc)
2835 {
2836 Log(("PGMVerifyAccess: access violation for %VGv due to non-present PDPTR\n", GCPtrPage));
2837 return VINF_EM_RAW_GUEST_TRAP;
2838 }
2839# elif PGM_GST_TYPE == PGM_TYPE_AMD64
2840 unsigned iPDSrc;
2841 PX86PML4E pPml4eSrc;
2842 X86PDPE PdpeSrc;
2843 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
2844 if (!pPDSrc)
2845 {
2846 Log(("PGMVerifyAccess: access violation for %VGv due to non-present PDPTR\n", GCPtrPage));
2847 return VINF_EM_RAW_GUEST_TRAP;
2848 }
2849# endif
2850# else
2851 PGSTPD pPDSrc = NULL;
2852 const unsigned iPDSrc = 0;
2853# endif
2854 int rc = VINF_SUCCESS;
2855
2856 /*
2857 * First check if the shadow pd is present.
2858 */
2859# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2860 PX86PDE pPdeDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[GCPtrPage >> SHW_PD_SHIFT];
2861# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2862 PX86PDEPAE pPdeDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[GCPtrPage >> SHW_PD_SHIFT];
2863# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2864 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2865 PX86PDPAE pPDDst;
2866 PX86PDEPAE pPdeDst;
2867
2868# if PGM_GST_TYPE == PGM_TYPE_PROT
2869 /* AMD-V nested paging */
2870 X86PML4E Pml4eSrc;
2871 X86PDPE PdpeSrc;
2872 PX86PML4E pPml4eSrc = &Pml4eSrc;
2873
2874 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
2875 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
2876 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
2877# endif
2878
2879 rc = PGMShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
2880 if (rc != VINF_SUCCESS)
2881 {
2882 AssertRC(rc);
2883 return rc;
2884 }
2885 Assert(pPDDst);
2886 pPdeDst = &pPDDst->a[iPDDst];
2887# endif
2888 if (!pPdeDst->n.u1Present)
2889 {
2890 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2891 AssertRC(rc);
2892 if (rc != VINF_SUCCESS)
2893 return rc;
2894 }
2895
2896# if PGM_WITH_PAGING(PGM_GST_TYPE)
2897 /* Check for dirty bit fault */
2898 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
2899 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
2900 Log(("PGMVerifyAccess: success (dirty)\n"));
2901 else
2902 {
2903 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2904#else
2905 {
2906 GSTPDE PdeSrc;
2907 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2908 PdeSrc.n.u1Present = 1;
2909 PdeSrc.n.u1Write = 1;
2910 PdeSrc.n.u1Accessed = 1;
2911 PdeSrc.n.u1User = 1;
2912
2913#endif /* PGM_WITH_PAGING(PGM_GST_TYPE) */
2914 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
2915 if (uErr & X86_TRAP_PF_US)
2916 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncUser);
2917 else /* supervisor */
2918 STAM_COUNTER_INC(&pVM->pgm.s.StatGCPageOutOfSyncSupervisor);
2919
2920 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
2921 if (VBOX_SUCCESS(rc))
2922 {
2923 /* Page was successfully synced */
2924 Log2(("PGMVerifyAccess: success (sync)\n"));
2925 rc = VINF_SUCCESS;
2926 }
2927 else
2928 {
2929 Log(("PGMVerifyAccess: access violation for %VGv rc=%d\n", GCPtrPage, rc));
2930 return VINF_EM_RAW_GUEST_TRAP;
2931 }
2932 }
2933 return rc;
2934
2935#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2936
2937 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2938 return VERR_INTERNAL_ERROR;
2939#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
2940}
2941
2942
2943#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
2944# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
2945/**
2946 * Figures out which kind of shadow page this guest PDE warrants.
2947 *
2948 * @returns Shadow page kind.
2949 * @param pPdeSrc The guest PDE in question.
2950 * @param cr4 The current guest cr4 value.
2951 */
2952DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const GSTPDE *pPdeSrc, uint32_t cr4)
2953{
2954# if PMG_GST_TYPE == PGM_TYPE_AMD64
2955 if (!pPdeSrc->n.u1Size)
2956# else
2957 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
2958# endif
2959 return BTH_PGMPOOLKIND_PT_FOR_PT;
2960 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
2961 //{
2962 // case 0:
2963 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
2964 // case X86_PDE4M_RW:
2965 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
2966 // case X86_PDE4M_US:
2967 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
2968 // case X86_PDE4M_RW | X86_PDE4M_US:
2969 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
2970# if 0
2971 // case X86_PDE4M_PAE_NX:
2972 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
2973 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
2974 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
2975 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
2976 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
2977 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
2978 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
2979# endif
2980 return BTH_PGMPOOLKIND_PT_FOR_BIG;
2981 //}
2982}
2983# endif
2984#endif
2985
2986#undef MY_STAM_COUNTER_INC
2987#define MY_STAM_COUNTER_INC(a) do { } while (0)
2988
2989
2990/**
2991 * Syncs the paging hierarchy starting at CR3.
2992 *
2993 * @returns VBox status code, no specials.
2994 * @param pVM The virtual machine.
2995 * @param cr0 Guest context CR0 register
2996 * @param cr3 Guest context CR3 register
2997 * @param cr4 Guest context CR4 register
2998 * @param fGlobal Including global page directories or not
2999 */
3000PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3001{
3002 if (VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
3003 fGlobal = true; /* Change this CR3 reload to be a global one. */
3004
3005#if PGM_SHW_TYPE != PGM_TYPE_NESTED
3006 /*
3007 * Update page access handlers.
3008 * The virtual are always flushed, while the physical are only on demand.
3009 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3010 * have to look into that later because it will have a bad influence on the performance.
3011 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3012 * bird: Yes, but that won't work for aliases.
3013 */
3014 /** @todo this MUST go away. See #1557. */
3015 STAM_PROFILE_START(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
3016 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3017 STAM_PROFILE_STOP(&pVM->pgm.s.CTXMID(Stat,SyncCR3Handlers), h);
3018#endif
3019
3020#ifdef PGMPOOL_WITH_MONITORING
3021 int rc = pgmPoolSyncCR3(pVM);
3022 if (rc != VINF_SUCCESS)
3023 return rc;
3024#endif
3025
3026#if PGM_SHW_TYPE == PGM_TYPE_NESTED
3027 /** @todo check if this is really necessary */
3028 HWACCMFlushTLB(pVM);
3029 return VINF_SUCCESS;
3030
3031#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3032 /* No need to check all paging levels; we zero out the shadow parts when the guest modifies its tables. */
3033 return VINF_SUCCESS;
3034#else
3035
3036 Assert(fGlobal || (cr4 & X86_CR4_PGE));
3037 MY_STAM_COUNTER_INC(fGlobal ? &pVM->pgm.s.CTXMID(Stat,SyncCR3Global) : &pVM->pgm.s.CTXMID(Stat,SyncCR3NotGlobal));
3038
3039# if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3040# if PGM_GST_TYPE == PGM_TYPE_AMD64
3041 bool fBigPagesSupported = true;
3042# else
3043 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
3044# endif
3045
3046 /*
3047 * Get page directory addresses.
3048 */
3049# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3050 PX86PDE pPDEDst = &pVM->pgm.s.CTXMID(p,32BitPD)->a[0];
3051# else /* PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64*/
3052# if PGM_GST_TYPE == PGM_TYPE_32BIT
3053 PX86PDEPAE pPDEDst = &pVM->pgm.s.CTXMID(ap,PaePDs)[0]->a[0];
3054# endif
3055# endif
3056
3057# if PGM_GST_TYPE == PGM_TYPE_32BIT
3058 PGSTPD pPDSrc = CTXSUFF(pVM->pgm.s.pGuestPD);
3059 Assert(pPDSrc);
3060# ifndef IN_GC
3061 Assert(MMPhysGCPhys2HCVirt(pVM, (RTGCPHYS)(cr3 & GST_CR3_PAGE_MASK), sizeof(*pPDSrc)) == pPDSrc);
3062# endif
3063# endif
3064
3065 /*
3066 * Iterate the page directory.
3067 */
3068 PPGMMAPPING pMapping;
3069 unsigned iPdNoMapping;
3070 const bool fRawR0Enabled = EMIsRawRing0Enabled(pVM);
3071 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3072
3073 /* Only check mappings if they are supposed to be put into the shadow page table. */
3074 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
3075 {
3076 pMapping = pVM->pgm.s.CTXALLSUFF(pMappings);
3077 iPdNoMapping = (pMapping) ? (pMapping->GCPtr >> GST_PD_SHIFT) : ~0U;
3078 }
3079 else
3080 {
3081 pMapping = 0;
3082 iPdNoMapping = ~0U;
3083 }
3084# if PGM_GST_TYPE == PGM_TYPE_AMD64
3085 for (uint64_t iPml4e = 0; iPml4e < X86_PG_PAE_ENTRIES; iPml4e++)
3086 {
3087 PPGMPOOLPAGE pShwPdpt = NULL;
3088 PX86PML4E pPml4eSrc, pPml4eDst;
3089 RTGCPHYS GCPhysPdptSrc;
3090
3091 pPml4eSrc = &pVM->pgm.s.CTXSUFF(pGstPaePML4)->a[iPml4e];
3092 pPml4eDst = &pVM->pgm.s.CTXMID(p,PaePML4)->a[iPml4e];
3093
3094 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3095 if (!pPml4eDst->n.u1Present)
3096 continue;
3097 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3098
3099 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3100
3101 /* Anything significant changed? */
3102 if ( pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present
3103 || GCPhysPdptSrc != pShwPdpt->GCPhys)
3104 {
3105 /* Free it. */
3106 LogFlow(("SyncCR3: Out-of-sync PML4E (GCPhys) GCPtr=%VGv %VGp vs %VGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
3107 (uint64_t)iPml4e << X86_PML4_SHIFT, pShwPdpt->GCPhys, GCPhysPdptSrc, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
3108 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.pHCShwAmd64CR3->idx, iPml4e);
3109 pPml4eDst->u = 0;
3110 continue;
3111 }
3112 /* Force an attribute sync. */
3113 pPml4eDst->n.u1User = pPml4eSrc->n.u1User;
3114 pPml4eDst->n.u1Write = pPml4eSrc->n.u1Write;
3115 pPml4eDst->n.u1NoExecute = pPml4eSrc->n.u1NoExecute;
3116
3117# else
3118 {
3119# endif
3120# if PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3121 for (uint64_t iPdpte = 0; iPdpte < GST_PDPE_ENTRIES; iPdpte++)
3122 {
3123 unsigned iPDSrc;
3124# if PGM_GST_TYPE == PGM_TYPE_PAE
3125 PX86PDPAE pPDPAE = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
3126 PX86PDEPAE pPDEDst = &pPDPAE->a[iPdpte * X86_PG_PAE_ENTRIES];
3127 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, iPdpte << X86_PDPT_SHIFT, &iPDSrc);
3128 PX86PDPT pPdptDst = pVM->pgm.s.CTXMID(p,PaePDPT);
3129 X86PDPE PdpeSrc = CTXSUFF(pVM->pgm.s.pGstPaePDPT)->a[iPdpte];
3130
3131 if (pPDSrc == NULL)
3132 {
3133 /* PDPE not present */
3134 if (pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPdpte].n.u1Present)
3135 {
3136 LogFlow(("SyncCR3: guest PDPE %d not present; clear shw pdpe\n", iPdpte));
3137 /* for each page directory entry */
3138 for (unsigned iPD = 0; iPD < ELEMENTS(pPDSrc->a); iPD++)
3139 {
3140 if ( pPDEDst[iPD].n.u1Present
3141 && !(pPDEDst[iPD].u & PGM_PDFLAGS_MAPPING))
3142 {
3143 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst[iPD].u & SHW_PDE_PG_MASK), SHW_POOL_ROOT_IDX, iPdpte * X86_PG_PAE_ENTRIES + iPD);
3144 pPDEDst[iPD].u = 0;
3145 }
3146 }
3147 }
3148 if (!(pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPdpte].u & PGM_PLXFLAGS_MAPPING))
3149 pVM->pgm.s.CTXMID(p,PaePDPT)->a[iPdpte].n.u1Present = 0;
3150 continue;
3151 }
3152# else /* PGM_GST_TYPE != PGM_TYPE_PAE */
3153 PPGMPOOLPAGE pShwPde = NULL;
3154 RTGCPHYS GCPhysPdeSrc;
3155 PX86PDPE pPdpeDst;
3156 PX86PML4E pPml4eSrc;
3157 X86PDPE PdpeSrc;
3158 PX86PDPT pPdptDst;
3159 PX86PDPAE pPDDst;
3160 PX86PDEPAE pPDEDst;
3161 RTGCUINTPTR GCPtr = (iPml4e << X86_PML4_SHIFT) || (iPdpte << X86_PDPT_SHIFT);
3162 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3163
3164 int rc = PGMShwGetLongModePDPtr(pVM, GCPtr, &pPdptDst, &pPDDst);
3165 if (rc != VINF_SUCCESS)
3166 {
3167 if (rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT)
3168 break; /* next PML4E */
3169
3170 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Vrc\n", rc));
3171 continue; /* next PDPTE */
3172 }
3173 Assert(pPDDst);
3174 pPDEDst = &pPDDst->a[0];
3175 Assert(iPDSrc == 0);
3176
3177 pPdpeDst = &pPdptDst->a[iPdpte];
3178
3179 /* Fetch the pgm pool shadow descriptor if the shadow pdpte is present. */
3180 if (!pPdpeDst->n.u1Present)
3181 continue; /* next PDPTE */
3182
3183 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3184 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3185
3186 /* Anything significant changed? */
3187 if ( PdpeSrc.n.u1Present != pPdpeDst->n.u1Present
3188 || GCPhysPdeSrc != pShwPde->GCPhys)
3189 {
3190 /* Free it. */
3191 LogFlow(("SyncCR3: Out-of-sync PDPE (GCPhys) GCPtr=%VGv %VGp vs %VGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
3192 ((uint64_t)iPml4e << X86_PML4_SHIFT) + ((uint64_t)iPdpte << X86_PDPT_SHIFT), pShwPde->GCPhys, GCPhysPdeSrc, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
3193
3194 /* Mark it as not present if there's no hypervisor mapping present. (bit flipped at the top of Trap0eHandler) */
3195 Assert(!(pPdpeDst->u & PGM_PLXFLAGS_MAPPING));
3196 pgmPoolFreeByPage(pPool, pShwPde, pShwPde->idx, iPdpte);
3197 pPdpeDst->u = 0;
3198 continue; /* next guest PDPTE */
3199 }
3200 /* Force an attribute sync. */
3201 pPdpeDst->lm.u1User = PdpeSrc.lm.u1User;
3202 pPdpeDst->lm.u1Write = PdpeSrc.lm.u1Write;
3203 pPdpeDst->lm.u1NoExecute = PdpeSrc.lm.u1NoExecute;
3204# endif /* PGM_GST_TYPE != PGM_TYPE_PAE */
3205
3206# else /* PGM_GST_TYPE != PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_AMD64 */
3207 {
3208# endif /* PGM_GST_TYPE != PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_AMD64 */
3209 for (unsigned iPD = 0; iPD < ELEMENTS(pPDSrc->a); iPD++)
3210 {
3211# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3212 Assert(&pVM->pgm.s.CTXMID(p,32BitPD)->a[iPD] == pPDEDst);
3213# elif PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3214 AssertMsg(&pVM->pgm.s.CTXMID(ap,PaePDs)[iPD * 2 / 512]->a[iPD * 2 % 512] == pPDEDst, ("%p vs %p\n", &pVM->pgm.s.CTXMID(ap,PaePDs)[iPD * 2 / 512]->a[iPD * 2 % 512], pPDEDst));
3215# endif
3216 register GSTPDE PdeSrc = pPDSrc->a[iPD];
3217 if ( PdeSrc.n.u1Present
3218 && (PdeSrc.n.u1User || fRawR0Enabled))
3219 {
3220# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3221 || PGM_GST_TYPE == PGM_TYPE_PAE) \
3222 && !defined(PGM_WITHOUT_MAPPINGS)
3223
3224 /*
3225 * Check for conflicts with GC mappings.
3226 */
3227# if PGM_GST_TYPE == PGM_TYPE_PAE
3228 if (iPD + iPdpte * X86_PG_PAE_ENTRIES == iPdNoMapping)
3229# else
3230 if (iPD == iPdNoMapping)
3231# endif
3232 {
3233 if (pVM->pgm.s.fMappingsFixed)
3234 {
3235 /* It's fixed, just skip the mapping. */
3236 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
3237 iPD += cPTs - 1;
3238 pPDEDst += cPTs + (PGM_GST_TYPE != PGM_SHW_TYPE) * cPTs; /* Only applies to the pae shadow and 32 bits guest case */
3239 pMapping = pMapping->CTXALLSUFF(pNext);
3240 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3241 continue;
3242 }
3243# ifdef IN_RING3
3244# if PGM_GST_TYPE == PGM_TYPE_32BIT
3245 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
3246# elif PGM_GST_TYPE == PGM_TYPE_PAE
3247 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPdpte << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
3248# endif
3249 if (VBOX_FAILURE(rc))
3250 return rc;
3251
3252 /*
3253 * Update iPdNoMapping and pMapping.
3254 */
3255 pMapping = pVM->pgm.s.pMappingsR3;
3256 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
3257 pMapping = pMapping->pNextR3;
3258 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3259# else
3260 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3261 return VINF_PGM_SYNC_CR3;
3262# endif
3263 }
3264# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3265 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3266# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3267 /*
3268 * Sync page directory entry.
3269 *
3270 * The current approach is to allocated the page table but to set
3271 * the entry to not-present and postpone the page table synching till
3272 * it's actually used.
3273 */
3274# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3275 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
3276# elif PGM_GST_TYPE == PGM_TYPE_PAE
3277 const unsigned iPdShw = iPD + iPdpte * X86_PG_PAE_ENTRIES; NOREF(iPdShw);
3278# else
3279 const unsigned iPdShw = iPD; NOREF(iPdShw);
3280# endif
3281 {
3282 SHWPDE PdeDst = *pPDEDst;
3283 if (PdeDst.n.u1Present)
3284 {
3285 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
3286 RTGCPHYS GCPhys;
3287 if ( !PdeSrc.b.u1Size
3288 || !fBigPagesSupported)
3289 {
3290 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
3291# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3292 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3293 GCPhys |= i * (PAGE_SIZE / 2);
3294# endif
3295 }
3296 else
3297 {
3298 GCPhys = PdeSrc.u & GST_PDE_BIG_PG_MASK;
3299# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3300 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
3301 GCPhys |= i * X86_PAGE_2M_SIZE;
3302# endif
3303 }
3304
3305 if ( pShwPage->GCPhys == GCPhys
3306 && pShwPage->enmKind == PGM_BTH_NAME(CalcPageKind)(&PdeSrc, cr4)
3307 && ( pShwPage->fCached
3308 || ( !fGlobal
3309 && ( false
3310# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
3311 || ( (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
3312# if PGM_GST_TYPE == PGM_TYPE_AMD64
3313 && (cr4 & X86_CR4_PGE)) /* global 2/4MB page. */
3314# else
3315 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE)) /* global 2/4MB page. */
3316# endif
3317 || ( !pShwPage->fSeenNonGlobal
3318 && (cr4 & X86_CR4_PGE))
3319# endif
3320 )
3321 )
3322 )
3323 && ( (PdeSrc.u & (X86_PDE_US | X86_PDE_RW)) == (PdeDst.u & (X86_PDE_US | X86_PDE_RW))
3324 || ( fBigPagesSupported
3325 && ((PdeSrc.u & (X86_PDE_US | X86_PDE4M_PS | X86_PDE4M_D)) | PGM_PDFLAGS_TRACK_DIRTY)
3326 == ((PdeDst.u & (X86_PDE_US | X86_PDE_RW | PGM_PDFLAGS_TRACK_DIRTY)) | X86_PDE4M_PS))
3327 )
3328 )
3329 {
3330# ifdef VBOX_WITH_STATISTICS
3331 if ( !fGlobal
3332 && (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
3333# if PGM_GST_TYPE == PGM_TYPE_AMD64
3334 && (cr4 & X86_CR4_PGE)) /* global 2/4MB page. */
3335# else
3336 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE))
3337# endif
3338 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPD));
3339 else if (!fGlobal && !pShwPage->fSeenNonGlobal && (cr4 & X86_CR4_PGE))
3340 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstSkippedGlobalPT));
3341 else
3342 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstCacheHit));
3343# endif /* VBOX_WITH_STATISTICS */
3344 /** @todo a replacement strategy isn't really needed unless we're using a very small pool < 512 pages.
3345 * The whole ageing stuff should be put in yet another set of #ifdefs. For now, let's just skip it. */
3346 //# ifdef PGMPOOL_WITH_CACHE
3347 // pgmPoolCacheUsed(pPool, pShwPage);
3348 //# endif
3349 }
3350 else
3351 {
3352# if PGM_GST_TYPE == PGM_TYPE_AMD64
3353 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPdShw);
3354# else
3355 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPdShw);
3356# endif
3357 pPDEDst->u = 0;
3358 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreed));
3359 }
3360 }
3361 else
3362 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstNotPresent));
3363 pPDEDst++;
3364 }
3365 }
3366# if PGM_GST_TYPE == PGM_TYPE_PAE
3367 else if (iPD + iPdpte * X86_PG_PAE_ENTRIES != iPdNoMapping)
3368# else
3369 else if (iPD != iPdNoMapping)
3370# endif
3371 {
3372 /*
3373 * Check if there is any page directory to mark not present here.
3374 */
3375# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3376 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
3377# elif PGM_GST_TYPE == PGM_TYPE_PAE
3378 const unsigned iPdShw = iPD + iPdpte * X86_PG_PAE_ENTRIES; NOREF(iPdShw);
3379# else
3380 const unsigned iPdShw = iPD; NOREF(iPdShw);
3381# endif
3382 {
3383 if (pPDEDst->n.u1Present)
3384 {
3385# if PGM_GST_TYPE == PGM_TYPE_AMD64
3386 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst->u & SHW_PDE_PG_MASK), pShwPde->idx, iPdShw);
3387# else
3388 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, pPDEDst->u & SHW_PDE_PG_MASK), SHW_POOL_ROOT_IDX, iPdShw);
3389# endif
3390 pPDEDst->u = 0;
3391 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTXMID(Stat,SyncCR3DstFreedSrcNP));
3392 }
3393 pPDEDst++;
3394 }
3395 }
3396 else
3397 {
3398# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3399 || PGM_GST_TYPE == PGM_TYPE_PAE) \
3400 && !defined(PGM_WITHOUT_MAPPINGS)
3401
3402 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
3403
3404 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3405 if (pVM->pgm.s.fMappingsFixed)
3406 {
3407 /* It's fixed, just skip the mapping. */
3408 pMapping = pMapping->CTXALLSUFF(pNext);
3409 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3410 }
3411 else
3412 {
3413 /*
3414 * Check for conflicts for subsequent pagetables
3415 * and advance to the next mapping.
3416 */
3417 iPdNoMapping = ~0U;
3418 unsigned iPT = cPTs;
3419 while (iPT-- > 1)
3420 {
3421 if ( pPDSrc->a[iPD + iPT].n.u1Present
3422 && (pPDSrc->a[iPD + iPT].n.u1User || fRawR0Enabled))
3423 {
3424# ifdef IN_RING3
3425# if PGM_GST_TYPE == PGM_TYPE_32BIT
3426 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
3427# elif PGM_GST_TYPE == PGM_TYPE_PAE
3428 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPdpte << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
3429# endif
3430 if (VBOX_FAILURE(rc))
3431 return rc;
3432
3433 /*
3434 * Update iPdNoMapping and pMapping.
3435 */
3436 pMapping = pVM->pgm.s.CTXALLSUFF(pMappings);
3437 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
3438 pMapping = pMapping->CTXALLSUFF(pNext);
3439 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3440 break;
3441# else
3442 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3443 return VINF_PGM_SYNC_CR3;
3444# endif
3445 }
3446 }
3447 if (iPdNoMapping == ~0U && pMapping)
3448 {
3449 pMapping = pMapping->CTXALLSUFF(pNext);
3450 if (pMapping)
3451 iPdNoMapping = pMapping->GCPtr >> GST_PD_SHIFT;
3452 }
3453 }
3454
3455 /* advance. */
3456 iPD += cPTs - 1;
3457 pPDEDst += cPTs + (PGM_GST_TYPE != PGM_SHW_TYPE) * cPTs; /* Only applies to the pae shadow and 32 bits guest case */
3458# if PGM_GST_TYPE != PGM_SHW_TYPE
3459 AssertCompile(PGM_GST_TYPE == PGM_TYPE_32BIT && PGM_SHW_TYPE == PGM_TYPE_PAE);
3460# endif
3461# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3462 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3463# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3464 }
3465
3466 } /* for iPD */
3467 } /* for each PDPTE (PAE) */
3468 } /* for each page map level 4 entry (amd64) */
3469 return VINF_SUCCESS;
3470
3471# else /* guest real and protected mode */
3472 return VINF_SUCCESS;
3473# endif
3474#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED */
3475}
3476
3477
3478
3479
3480#ifdef VBOX_STRICT
3481#ifdef IN_GC
3482# undef AssertMsgFailed
3483# define AssertMsgFailed Log
3484#endif
3485#ifdef IN_RING3
3486# include <VBox/dbgf.h>
3487
3488/**
3489 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3490 *
3491 * @returns VBox status code (VINF_SUCCESS).
3492 * @param pVM The VM handle.
3493 * @param cr3 The root of the hierarchy.
3494 * @param crr The cr4, only PAE and PSE is currently used.
3495 * @param fLongMode Set if long mode, false if not long mode.
3496 * @param cMaxDepth Number of levels to dump.
3497 * @param pHlp Pointer to the output functions.
3498 */
3499__BEGIN_DECLS
3500PGMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3501__END_DECLS
3502
3503#endif
3504
3505/**
3506 * Checks that the shadow page table is in sync with the guest one.
3507 *
3508 * @returns The number of errors.
3509 * @param pVM The virtual machine.
3510 * @param cr3 Guest context CR3 register
3511 * @param cr4 Guest context CR4 register
3512 * @param GCPtr Where to start. Defaults to 0.
3513 * @param cb How much to check. Defaults to everything.
3514 */
3515PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCUINTPTR GCPtr, RTGCUINTPTR cb)
3516{
3517#if PGM_SHW_TYPE == PGM_TYPE_NESTED
3518 return 0;
3519#else
3520 unsigned cErrors = 0;
3521
3522#if PGM_GST_TYPE == PGM_TYPE_PAE
3523 /* @todo currently broken; crashes below somewhere */
3524 AssertFailed();
3525#endif
3526
3527#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3528 || PGM_GST_TYPE == PGM_TYPE_PAE \
3529 || PGM_GST_TYPE == PGM_TYPE_AMD64
3530
3531# if PGM_GST_TYPE == PGM_TYPE_AMD64
3532 bool fBigPagesSupported = true;
3533# else
3534 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
3535# endif
3536 PPGM pPGM = &pVM->pgm.s;
3537 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3538 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3539# ifndef IN_RING0
3540 RTHCPHYS HCPhys; /* general usage. */
3541# endif
3542 int rc;
3543
3544 /*
3545 * Check that the Guest CR3 and all its mappings are correct.
3546 */
3547 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3548 ("Invalid GCPhysCR3=%VGp cr3=%VGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3549 false);
3550# ifndef IN_RING0
3551# if PGM_GST_TYPE == PGM_TYPE_32BIT
3552 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGuestPDGC, NULL, &HCPhysShw);
3553# else
3554 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGstPaePDPTGC, NULL, &HCPhysShw);
3555# endif
3556 AssertRCReturn(rc, 1);
3557 HCPhys = NIL_RTHCPHYS;
3558 rc = pgmRamGCPhys2HCPhys(pPGM, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3559 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%VHp HCPhyswShw=%VHp (cr3)\n", HCPhys, HCPhysShw), false);
3560# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3561 RTGCPHYS GCPhys;
3562 rc = PGMR3DbgHCPtr2GCPhys(pVM, pPGM->pGuestPDHC, &GCPhys);
3563 AssertRCReturn(rc, 1);
3564 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%VGp cr3=%VGp\n", GCPhys, (RTGCPHYS)cr3), false);
3565# endif
3566#endif /* !IN_RING0 */
3567
3568 /*
3569 * Get and check the Shadow CR3.
3570 */
3571# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3572 unsigned cPDEs = X86_PG_ENTRIES;
3573 unsigned ulIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3574# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3575# if PGM_GST_TYPE == PGM_TYPE_32BIT
3576 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3577# else
3578 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3579# endif
3580 unsigned ulIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3581# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3582 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3583 unsigned ulIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3584# endif
3585 if (cb != ~(RTGCUINTPTR)0)
3586 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3587
3588/** @todo call the other two PGMAssert*() functions. */
3589
3590# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3591 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3592# endif
3593
3594# if PGM_GST_TYPE == PGM_TYPE_AMD64
3595 unsigned iPml4e = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3596
3597 for (; iPml4e < X86_PG_PAE_ENTRIES; iPml4e++)
3598 {
3599 PPGMPOOLPAGE pShwPdpt = NULL;
3600 PX86PML4E pPml4eSrc, pPml4eDst;
3601 RTGCPHYS GCPhysPdptSrc;
3602
3603 pPml4eSrc = &pVM->pgm.s.CTXSUFF(pGstPaePML4)->a[iPml4e];
3604 pPml4eDst = &pVM->pgm.s.CTXMID(p,PaePML4)->a[iPml4e];
3605
3606 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3607 if (!pPml4eDst->n.u1Present)
3608 {
3609 GCPtr += UINT64_C(_2M * 512 * 512);
3610 continue;
3611 }
3612
3613# if PGM_GST_TYPE == PGM_TYPE_PAE
3614 /* not correct to call pgmPoolGetPage */
3615 AssertFailed();
3616# endif
3617 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3618 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3619
3620 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3621 {
3622 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3623 GCPtr += UINT64_C(_2M * 512 * 512);
3624 cErrors++;
3625 continue;
3626 }
3627
3628 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3629 {
3630 AssertMsgFailed(("Physical address doesn't match! iPml4e %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4e, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3631 GCPtr += UINT64_C(_2M * 512 * 512);
3632 cErrors++;
3633 continue;
3634 }
3635
3636 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3637 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3638 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3639 {
3640 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3641 GCPtr += UINT64_C(_2M * 512 * 512);
3642 cErrors++;
3643 continue;
3644 }
3645# else
3646 {
3647# endif
3648
3649# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3650 /*
3651 * Check the PDPTEs too.
3652 */
3653 unsigned iPdpte = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3654
3655 for (;iPdpte <= SHW_PDPT_MASK; iPdpte++)
3656 {
3657 unsigned iPDSrc;
3658 PPGMPOOLPAGE pShwPde = NULL;
3659 PX86PDPE pPdpeDst;
3660 RTGCPHYS GCPhysPdeSrc;
3661# if PGM_GST_TYPE == PGM_TYPE_PAE
3662 PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0];
3663 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtr, &iPDSrc);
3664 PX86PDPT pPdptDst = pVM->pgm.s.CTXMID(p,PaePDPT);
3665 X86PDPE PdpeSrc = CTXSUFF(pVM->pgm.s.pGstPaePDPT)->a[iPdpte];
3666# else
3667 PX86PML4E pPml4eSrc;
3668 X86PDPE PdpeSrc;
3669 PX86PDPT pPdptDst;
3670 PX86PDPAE pPDDst;
3671 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3672
3673 rc = PGMShwGetLongModePDPtr(pVM, GCPtr, &pPdptDst, &pPDDst);
3674 if (rc != VINF_SUCCESS)
3675 {
3676 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Vrc\n", rc));
3677 GCPtr += 512 * _2M;
3678 continue; /* next PDPTE */
3679 }
3680 Assert(pPDDst);
3681# endif
3682 Assert(iPDSrc == 0);
3683
3684 pPdpeDst = &pPdptDst->a[iPdpte];
3685
3686 if (!pPdpeDst->n.u1Present)
3687 {
3688 GCPtr += 512 * _2M;
3689 continue; /* next PDPTE */
3690 }
3691
3692 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3693 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3694
3695 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3696 {
3697 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3698 GCPtr += 512 * _2M;
3699 cErrors++;
3700 continue;
3701 }
3702
3703 if (GCPhysPdeSrc != pShwPde->GCPhys)
3704 {
3705# if PGM_GST_TYPE == PGM_TYPE_AMD64
3706 AssertMsgFailed(("Physical address doesn't match! iPml4e %d iPdpte %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4e, iPdpte, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3707# else
3708 AssertMsgFailed(("Physical address doesn't match! iPdpte %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpte, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3709# endif
3710 GCPtr += 512 * _2M;
3711 cErrors++;
3712 continue;
3713 }
3714
3715# if PGM_GST_TYPE == PGM_TYPE_AMD64
3716 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3717 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3718 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3719 {
3720 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3721 GCPtr += 512 * _2M;
3722 cErrors++;
3723 continue;
3724 }
3725# endif
3726
3727# else
3728 {
3729# endif
3730# if PGM_GST_TYPE == PGM_TYPE_32BIT
3731 const GSTPD *pPDSrc = CTXSUFF(pPGM->pGuestPD);
3732# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3733 const X86PD *pPDDst = pPGM->CTXMID(p,32BitPD);
3734# else
3735 const PX86PDPAE pPDDst = pVM->pgm.s.CTXMID(ap,PaePDs)[0]; /* We treat this as a PD with 2048 entries, so no need to and with SHW_PD_MASK to get iPDDst */
3736# endif
3737# endif
3738 /*
3739 * Iterate the shadow page directory.
3740 */
3741 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3742 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3743
3744 for (;
3745 iPDDst < cPDEs;
3746 iPDDst++, GCPtr += ulIncrement)
3747 {
3748 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3749 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3750 {
3751 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3752 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3753 {
3754 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3755 cErrors++;
3756 continue;
3757 }
3758 }
3759 else if ( (PdeDst.u & X86_PDE_P)
3760 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3761 )
3762 {
3763 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3764 PPGMPOOLPAGE pPoolPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
3765 if (!pPoolPage)
3766 {
3767 AssertMsgFailed(("Invalid page table address %VGp at %VGv! PdeDst=%#RX64\n",
3768 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3769 cErrors++;
3770 continue;
3771 }
3772 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3773
3774 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3775 {
3776 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %VGv! These flags are not virtualized! PdeDst=%#RX64\n",
3777 GCPtr, (uint64_t)PdeDst.u));
3778 cErrors++;
3779 }
3780
3781 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3782 {
3783 AssertMsgFailed(("4K PDE reserved flags at %VGv! PdeDst=%#RX64\n",
3784 GCPtr, (uint64_t)PdeDst.u));
3785 cErrors++;
3786 }
3787
3788 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3789 if (!PdeSrc.n.u1Present)
3790 {
3791 AssertMsgFailed(("Guest PDE at %VGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3792 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3793 cErrors++;
3794 continue;
3795 }
3796
3797 if ( !PdeSrc.b.u1Size
3798 || !fBigPagesSupported)
3799 {
3800 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3801# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3802 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3803# endif
3804 }
3805 else
3806 {
3807# if PGM_GST_TYPE == PGM_TYPE_32BIT
3808 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3809 {
3810 AssertMsgFailed(("Guest PDE at %VGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3811 GCPtr, (uint64_t)PdeSrc.u));
3812 cErrors++;
3813 continue;
3814 }
3815# endif
3816 GCPhysGst = PdeSrc.u & GST_PDE_BIG_PG_MASK;
3817# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3818 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3819# endif
3820 }
3821
3822 if ( pPoolPage->enmKind
3823 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3824 {
3825 AssertMsgFailed(("Invalid shadow page table kind %d at %VGv! PdeSrc=%#RX64\n",
3826 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3827 cErrors++;
3828 }
3829
3830 PPGMPAGE pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3831 if (!pPhysPage)
3832 {
3833 AssertMsgFailed(("Cannot find guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3834 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3835 cErrors++;
3836 continue;
3837 }
3838
3839 if (GCPhysGst != pPoolPage->GCPhys)
3840 {
3841 AssertMsgFailed(("GCPhysGst=%VGp != pPage->GCPhys=%VGp at %VGv\n",
3842 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3843 cErrors++;
3844 continue;
3845 }
3846
3847 if ( !PdeSrc.b.u1Size
3848 || !fBigPagesSupported)
3849 {
3850 /*
3851 * Page Table.
3852 */
3853 const GSTPT *pPTSrc;
3854 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3855 if (VBOX_FAILURE(rc))
3856 {
3857 AssertMsgFailed(("Cannot map/convert guest physical address %VGp in the PDE at %VGv! PdeSrc=%#RX64\n",
3858 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3859 cErrors++;
3860 continue;
3861 }
3862 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3863 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3864 {
3865 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3866 // (This problem will go away when/if we shadow multiple CR3s.)
3867 AssertMsgFailed(("4K PDE flags mismatch at %VGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3868 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3869 cErrors++;
3870 continue;
3871 }
3872 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3873 {
3874 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%VGv PdeDst=%#RX64\n",
3875 GCPtr, (uint64_t)PdeDst.u));
3876 cErrors++;
3877 continue;
3878 }
3879
3880 /* iterate the page table. */
3881# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3882 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3883 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3884# else
3885 const unsigned offPTSrc = 0;
3886# endif
3887 for (unsigned iPT = 0, off = 0;
3888 iPT < ELEMENTS(pPTDst->a);
3889 iPT++, off += PAGE_SIZE)
3890 {
3891 const SHWPTE PteDst = pPTDst->a[iPT];
3892
3893 /* skip not-present entries. */
3894 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3895 continue;
3896 Assert(PteDst.n.u1Present);
3897
3898 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3899 if (!PteSrc.n.u1Present)
3900 {
3901# ifdef IN_RING3
3902 PGMAssertHandlerAndFlagsInSync(pVM);
3903 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3904# endif
3905 AssertMsgFailed(("Out of sync (!P) PTE at %VGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%VGv iPTSrc=%x PdeSrc=%x physpte=%VGp\n",
3906 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3907 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3908 cErrors++;
3909 continue;
3910 }
3911
3912 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3913# if 1 /** @todo sync accessed bit properly... */
3914 fIgnoreFlags |= X86_PTE_A;
3915# endif
3916
3917 /* match the physical addresses */
3918 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3919 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3920
3921# ifdef IN_RING3
3922 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3923 if (VBOX_FAILURE(rc))
3924 {
3925 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3926 {
3927 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3928 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3929 cErrors++;
3930 continue;
3931 }
3932 }
3933 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3934 {
3935 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3936 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3937 cErrors++;
3938 continue;
3939 }
3940# endif
3941
3942 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3943 if (!pPhysPage)
3944 {
3945# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3946 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3947 {
3948 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3949 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3950 cErrors++;
3951 continue;
3952 }
3953# endif
3954 if (PteDst.n.u1Write)
3955 {
3956 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3957 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3958 cErrors++;
3959 }
3960 fIgnoreFlags |= X86_PTE_RW;
3961 }
3962 else if (HCPhysShw != (PGM_PAGE_GET_HCPHYS(pPhysPage) & SHW_PTE_PG_MASK))
3963 {
3964 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PteSrc=%#RX64 PteDst=%#RX64\n",
3965 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3966 cErrors++;
3967 continue;
3968 }
3969
3970 /* flags */
3971 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3972 {
3973 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3974 {
3975 if (PteDst.n.u1Write)
3976 {
3977 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PteSrc=%#RX64 PteDst=%#RX64\n",
3978 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3979 cErrors++;
3980 continue;
3981 }
3982 fIgnoreFlags |= X86_PTE_RW;
3983 }
3984 else
3985 {
3986 if (PteDst.n.u1Present)
3987 {
3988 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VHp PteSrc=%#RX64 PteDst=%#RX64\n",
3989 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3990 cErrors++;
3991 continue;
3992 }
3993 fIgnoreFlags |= X86_PTE_P;
3994 }
3995 }
3996 else
3997 {
3998 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3999 {
4000 if (PteDst.n.u1Write)
4001 {
4002 AssertMsgFailed(("!DIRTY page at %VGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4003 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4004 cErrors++;
4005 continue;
4006 }
4007 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4008 {
4009 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4010 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4011 cErrors++;
4012 continue;
4013 }
4014 if (PteDst.n.u1Dirty)
4015 {
4016 AssertMsgFailed(("!DIRTY page at %VGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4017 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4018 cErrors++;
4019 }
4020# if 0 /** @todo sync access bit properly... */
4021 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4022 {
4023 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4024 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4025 cErrors++;
4026 }
4027 fIgnoreFlags |= X86_PTE_RW;
4028# else
4029 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4030# endif
4031 }
4032 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4033 {
4034 /* access bit emulation (not implemented). */
4035 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4036 {
4037 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4038 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4039 cErrors++;
4040 continue;
4041 }
4042 if (!PteDst.n.u1Accessed)
4043 {
4044 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4045 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4046 cErrors++;
4047 }
4048 fIgnoreFlags |= X86_PTE_P;
4049 }
4050# ifdef DEBUG_sandervl
4051 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4052# endif
4053 }
4054
4055 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4056 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4057 )
4058 {
4059 AssertMsgFailed(("Flags mismatch at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4060 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4061 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4062 cErrors++;
4063 continue;
4064 }
4065 } /* foreach PTE */
4066 }
4067 else
4068 {
4069 /*
4070 * Big Page.
4071 */
4072 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4073 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4074 {
4075 if (PdeDst.n.u1Write)
4076 {
4077 AssertMsgFailed(("!DIRTY page at %VGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4078 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4079 cErrors++;
4080 continue;
4081 }
4082 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4083 {
4084 AssertMsgFailed(("!DIRTY page at %VGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4085 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4086 cErrors++;
4087 continue;
4088 }
4089# if 0 /** @todo sync access bit properly... */
4090 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4091 {
4092 AssertMsgFailed(("!DIRTY page at %VGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4093 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4094 cErrors++;
4095 }
4096 fIgnoreFlags |= X86_PTE_RW;
4097# else
4098 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4099# endif
4100 }
4101 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4102 {
4103 /* access bit emulation (not implemented). */
4104 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4105 {
4106 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %VGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4107 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4108 cErrors++;
4109 continue;
4110 }
4111 if (!PdeDst.n.u1Accessed)
4112 {
4113 AssertMsgFailed(("!ACCESSED page at %VGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4114 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4115 cErrors++;
4116 }
4117 fIgnoreFlags |= X86_PTE_P;
4118 }
4119
4120 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4121 {
4122 AssertMsgFailed(("Flags mismatch (B) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4123 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4124 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4125 cErrors++;
4126 }
4127
4128 /* iterate the page table. */
4129 for (unsigned iPT = 0, off = 0;
4130 iPT < ELEMENTS(pPTDst->a);
4131 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4132 {
4133 const SHWPTE PteDst = pPTDst->a[iPT];
4134
4135 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4136 {
4137 AssertMsgFailed(("The PTE at %VGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4138 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4139 cErrors++;
4140 }
4141
4142 /* skip not-present entries. */
4143 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4144 continue;
4145
4146 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4147
4148 /* match the physical addresses */
4149 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4150
4151# ifdef IN_RING3
4152 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4153 if (VBOX_FAILURE(rc))
4154 {
4155 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4156 {
4157 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4158 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4159 cErrors++;
4160 }
4161 }
4162 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4163 {
4164 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4165 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4166 cErrors++;
4167 continue;
4168 }
4169# endif
4170 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
4171 if (!pPhysPage)
4172 {
4173# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4174 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4175 {
4176 AssertMsgFailed(("Cannot find guest physical address %VGp at %VGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4177 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4178 cErrors++;
4179 continue;
4180 }
4181# endif
4182 if (PteDst.n.u1Write)
4183 {
4184 AssertMsgFailed(("Invalid guest page at %VGv is writable! GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4185 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4186 cErrors++;
4187 }
4188 fIgnoreFlags |= X86_PTE_RW;
4189 }
4190 else if (HCPhysShw != (pPhysPage->HCPhys & X86_PTE_PAE_PG_MASK))
4191 {
4192 AssertMsgFailed(("Out of sync (phys) at %VGv! HCPhysShw=%VHp HCPhys=%VHp GCPhysGst=%VGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4193 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4194 cErrors++;
4195 continue;
4196 }
4197
4198 /* flags */
4199 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4200 {
4201 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4202 {
4203 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4204 {
4205 if (PteDst.n.u1Write)
4206 {
4207 AssertMsgFailed(("WRITE access flagged at %VGv but the page is writable! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
4208 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4209 cErrors++;
4210 continue;
4211 }
4212 fIgnoreFlags |= X86_PTE_RW;
4213 }
4214 }
4215 else
4216 {
4217 if (PteDst.n.u1Present)
4218 {
4219 AssertMsgFailed(("ALL access flagged at %VGv but the page is present! HCPhys=%VGv PdeSrc=%#RX64 PteDst=%#RX64\n",
4220 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4221 cErrors++;
4222 continue;
4223 }
4224 fIgnoreFlags |= X86_PTE_P;
4225 }
4226 }
4227
4228 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4229 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4230 )
4231 {
4232 AssertMsgFailed(("Flags mismatch (BT) at %VGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4233 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4234 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4235 cErrors++;
4236 continue;
4237 }
4238 } /* for each PTE */
4239 }
4240 }
4241 /* not present */
4242
4243 } /* for each PDE */
4244
4245 } /* for each PDPTE */
4246
4247 } /* for each PML4E */
4248
4249# ifdef DEBUG
4250 if (cErrors)
4251 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4252# endif
4253
4254#endif
4255 return cErrors;
4256
4257#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED */
4258}
4259#endif /* VBOX_STRICT */
4260
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette