VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 37608

Last change on this file since 37608 was 37354, checked in by vboxsync, 14 years ago

PGM: Fixed locking issues in PGMR3PhysMMIORegister and PGMR3PhysMMIODeregister. Also addressed a harmless on in PGMR3PhysRomRegister (only used at init time, so no races). Fortified the code with assertions more lock assertion, replacing the incorrect PGMIsLocked() checks (we only care if the current thread is the lock owner). Cleaned up some ReturnStmt macros and adding more of them.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 202.1 KB
Line 
1/* $Id: PGMAllBth.h 37354 2011-06-07 15:05:32Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks The nested page tables on AMD makes use of PGM_SHW_TYPE in
6 * {PGM_TYPE_AMD64, PGM_TYPE_PAE and PGM_TYPE_32BIT} and PGM_GST_TYPE
7 * set to PGM_TYPE_PROT. Half of the code in this file is not
8 * exercised with PGM_SHW_TYPE set to PGM_TYPE_NESTED.
9 *
10 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
11 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
12 *
13 * @remarks This file is one big \#ifdef-orgy!
14 *
15 */
16
17/*
18 * Copyright (C) 2006-2010 Oracle Corporation
19 *
20 * This file is part of VirtualBox Open Source Edition (OSE), as
21 * available from http://www.virtualbox.org. This file is free software;
22 * you can redistribute it and/or modify it under the terms of the GNU
23 * General Public License (GPL) as published by the Free Software
24 * Foundation, in version 2 as it comes in the "COPYING" file of the
25 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
26 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
27 */
28
29
30/*******************************************************************************
31* Internal Functions *
32*******************************************************************************/
33RT_C_DECLS_BEGIN
34PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
35PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
36static int PGM_BTH_NAME(SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
37static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage);
38static int PGM_BTH_NAME(SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
39#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
40static void PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
41#else
42static void PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
43#endif
44PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
45PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
46PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
47#ifdef VBOX_STRICT
48PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
49#endif
50PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
51PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
52RT_C_DECLS_END
53
54
55/*
56 * Filter out some illegal combinations of guest and shadow paging, so we can
57 * remove redundant checks inside functions.
58 */
59#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
60# error "Invalid combination; PAE guest implies PAE shadow"
61#endif
62
63#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
64 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
65# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
66#endif
67
68#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
69 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
70# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
71#endif
72
73#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
74 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
75# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
76#endif
77
78#ifndef IN_RING3
79
80# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
81/**
82 * Deal with a guest page fault.
83 *
84 * @returns Strict VBox status code.
85 * @retval VINF_EM_RAW_GUEST_TRAP
86 * @retval VINF_EM_RAW_EMULATE_INSTR
87 *
88 * @param pVCpu The current CPU.
89 * @param pGstWalk The guest page table walk result.
90 * @param uErr The error code.
91 */
92PGM_BTH_DECL(VBOXSTRICTRC, Trap0eHandlerGuestFault)(PVMCPU pVCpu, PGSTPTWALK pGstWalk, RTGCUINT uErr)
93{
94# if !defined(PGM_WITHOUT_MAPPINGS) && (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE)
95 /*
96 * Check for write conflicts with our hypervisor mapping.
97 *
98 * If the guest happens to access a non-present page, where our hypervisor
99 * is currently mapped, then we'll create a #PF storm in the guest.
100 */
101 if ( (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW)
102 && MMHyperIsInsideArea(pVCpu->CTX_SUFF(pVM), pGstWalk->Core.GCPtr))
103 {
104 /* Force a CR3 sync to check for conflicts and emulate the instruction. */
105 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
106 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2GuestTrap; });
107 return VINF_EM_RAW_EMULATE_INSTR;
108 }
109# endif
110
111 /*
112 * Calc the error code for the guest trap.
113 */
114 uint32_t uNewErr = GST_IS_NX_ACTIVE(pVCpu)
115 ? uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID)
116 : uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US);
117 if (pGstWalk->Core.fBadPhysAddr)
118 {
119 uNewErr |= X86_TRAP_PF_RSVD | X86_TRAP_PF_P;
120 Assert(!pGstWalk->Core.fNotPresent);
121 }
122 else if (!pGstWalk->Core.fNotPresent)
123 uNewErr |= X86_TRAP_PF_P;
124 TRPMSetErrorCode(pVCpu, uNewErr);
125
126 LogFlow(("Guest trap; cr2=%RGv uErr=%RGv lvl=%d\n", pGstWalk->Core.GCPtr, uErr, pGstWalk->Core.uLevel));
127 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2GuestTrap; });
128 return VINF_EM_RAW_GUEST_TRAP;
129}
130# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
131
132
133/**
134 * Deal with a guest page fault.
135 *
136 * The caller has taken the PGM lock.
137 *
138 * @returns Strict VBox status code.
139 *
140 * @param pVCpu The current CPU.
141 * @param uErr The error code.
142 * @param pRegFrame The register frame.
143 * @param pvFault The fault address.
144 * @param pPage The guest page at @a pvFault.
145 * @param pGstWalk The guest page table walk result.
146 * @param pfLockTaken PGM lock taken here or not (out). This is true
147 * when we're called.
148 */
149static VBOXSTRICTRC PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame,
150 RTGCPTR pvFault, PPGMPAGE pPage, bool *pfLockTaken
151# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
152 , PGSTPTWALK pGstWalk
153# endif
154 )
155{
156# if !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
157 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A };
158#endif
159 PVM pVM = pVCpu->CTX_SUFF(pVM);
160 int rc;
161
162 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
163 {
164 /*
165 * Physical page access handler.
166 */
167# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
168 const RTGCPHYS GCPhysFault = pGstWalk->Core.GCPhys;
169# else
170 const RTGCPHYS GCPhysFault = (RTGCPHYS)pvFault;
171# endif
172 PPGMPHYSHANDLER pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
173 if (pCur)
174 {
175# ifdef PGM_SYNC_N_PAGES
176 /*
177 * If the region is write protected and we got a page not present fault, then sync
178 * the pages. If the fault was caused by a read, then restart the instruction.
179 * In case of write access continue to the GC write handler.
180 *
181 * ASSUMES that there is only one handler per page or that they have similar write properties.
182 */
183 if ( !(uErr & X86_TRAP_PF_P)
184 && pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE)
185 {
186# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
187 rc = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
188# else
189 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
190# endif
191 if ( RT_FAILURE(rc)
192 || !(uErr & X86_TRAP_PF_RW)
193 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
194 {
195 AssertRC(rc);
196 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersOutOfSync);
197 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndPhys; });
198 return rc;
199 }
200 }
201# endif
202# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
203 /*
204 * If the access was not thru a #PF(RSVD|...) resync the page.
205 */
206 if ( !(uErr & X86_TRAP_PF_RSVD)
207 && pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
208# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
209 && pGstWalk->Core.fEffectiveRW
210 && !pGstWalk->Core.fEffectiveUS /** @todo Remove pGstWalk->Core.fEffectiveUS and X86_PTE_US further down in the sync code. */
211# endif
212 )
213 {
214# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
215 rc = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
216# else
217 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
218# endif
219 if ( RT_FAILURE(rc)
220 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
221 {
222 AssertRC(rc);
223 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersOutOfSync);
224 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndPhys; });
225 return rc;
226 }
227 }
228# endif
229
230 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
231 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
232 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n",
233 pvFault, GCPhysFault, pPage, uErr, pCur->enmType));
234 if (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE)
235 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersPhysWrite);
236 else
237 {
238 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersPhysAll);
239 if (uErr & X86_TRAP_PF_RSVD) STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersPhysAllOpt);
240 }
241
242 if (pCur->CTX_SUFF(pfnHandler))
243 {
244 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
245 void *pvUser = pCur->CTX_SUFF(pvUser);
246# ifdef IN_RING0
247 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
248# else
249 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
250# endif
251
252 STAM_PROFILE_START(&pCur->Stat, h);
253 if (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler))
254 {
255 pgmUnlock(pVM);
256 *pfLockTaken = false;
257 }
258
259 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
260
261# ifdef VBOX_WITH_STATISTICS
262 pgmLock(pVM);
263 pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
264 if (pCur)
265 STAM_PROFILE_STOP(&pCur->Stat, h);
266 pgmUnlock(pVM);
267# endif
268 }
269 else
270 rc = VINF_EM_RAW_EMULATE_INSTR;
271
272 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndPhys; });
273 return rc;
274 }
275 }
276# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
277 else
278 {
279# ifdef PGM_SYNC_N_PAGES
280 /*
281 * If the region is write protected and we got a page not present fault, then sync
282 * the pages. If the fault was caused by a read, then restart the instruction.
283 * In case of write access continue to the GC write handler.
284 */
285 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
286 && !(uErr & X86_TRAP_PF_P))
287 {
288 rc = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
289 if ( RT_FAILURE(rc)
290 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
291 || !(uErr & X86_TRAP_PF_RW))
292 {
293 AssertRC(rc);
294 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersOutOfSync);
295 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndVirt; });
296 return rc;
297 }
298 }
299# endif
300 /*
301 * Ok, it's an virtual page access handler.
302 *
303 * Since it's faster to search by address, we'll do that first
304 * and then retry by GCPhys if that fails.
305 */
306 /** @todo r=bird: perhaps we should consider looking up by physical address directly now?
307 * r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be
308 * out of sync, because the page was changed without us noticing it (not-present -> present
309 * without invlpg or mov cr3, xxx).
310 */
311 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
312 if (pCur)
313 {
314 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
315 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
316 || !(uErr & X86_TRAP_PF_P)
317 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
318 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n",
319 pvFault, pGstWalk->Core.GCPhys, pPage, uErr, pCur->enmType));
320
321 if ( pvFault - pCur->Core.Key < pCur->cb
322 && ( uErr & X86_TRAP_PF_RW
323 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
324 {
325# ifdef IN_RC
326 STAM_PROFILE_START(&pCur->Stat, h);
327 RTGCPTR GCPtrStart = pCur->Core.Key;
328 CTX_MID(PFNPGM,VIRTHANDLER) pfnHandler = pCur->CTX_SUFF(pfnHandler);
329 pgmUnlock(pVM);
330 *pfLockTaken = false;
331
332 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPtrStart, pvFault - GCPtrStart);
333
334# ifdef VBOX_WITH_STATISTICS
335 pgmLock(pVM);
336 pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
337 if (pCur)
338 STAM_PROFILE_STOP(&pCur->Stat, h);
339 pgmUnlock(pVM);
340# endif
341# else
342 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
343# endif
344 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersVirtual);
345 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndVirt; });
346 return rc;
347 }
348 /* Unhandled part of a monitored page */
349 }
350 else
351 {
352 /* Check by physical address. */
353 unsigned iPage;
354 rc = pgmHandlerVirtualFindByPhysAddr(pVM, pGstWalk->Core.GCPhys, &pCur, &iPage);
355 Assert(RT_SUCCESS(rc) || !pCur);
356 if ( pCur
357 && ( uErr & X86_TRAP_PF_RW
358 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
359 {
360 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == (pGstWalk->Core.GCPhys & X86_PTE_PAE_PG_MASK));
361# ifdef IN_RC
362 STAM_PROFILE_START(&pCur->Stat, h);
363 RTGCPTR GCPtrStart = pCur->Core.Key;
364 CTX_MID(PFNPGM,VIRTHANDLER) pfnHandler = pCur->CTX_SUFF(pfnHandler);
365 pgmUnlock(pVM);
366 *pfLockTaken = false;
367
368 RTGCPTR off = (iPage << PAGE_SHIFT)
369 + (pvFault & PAGE_OFFSET_MASK)
370 - (GCPtrStart & PAGE_OFFSET_MASK);
371 Assert(off < pCur->cb);
372 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPtrStart, off);
373
374# ifdef VBOX_WITH_STATISTICS
375 pgmLock(pVM);
376 pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, GCPtrStart);
377 if (pCur)
378 STAM_PROFILE_STOP(&pCur->Stat, h);
379 pgmUnlock(pVM);
380# endif
381# else
382 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
383# endif
384 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersVirtualByPhys);
385 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndVirt; });
386 return rc;
387 }
388 }
389 }
390# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
391
392 /*
393 * There is a handled area of the page, but this fault doesn't belong to it.
394 * We must emulate the instruction.
395 *
396 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
397 * we first check if this was a page-not-present fault for a page with only
398 * write access handlers. Restart the instruction if it wasn't a write access.
399 */
400 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersUnhandled);
401
402 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
403 && !(uErr & X86_TRAP_PF_P))
404 {
405# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
406 rc = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
407# else
408 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
409# endif
410 if ( RT_FAILURE(rc)
411 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
412 || !(uErr & X86_TRAP_PF_RW))
413 {
414 AssertRC(rc);
415 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersOutOfSync);
416 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndPhys; });
417 return rc;
418 }
419 }
420
421 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
422 * It's writing to an unhandled part of the LDT page several million times.
423 */
424 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
425 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
426 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndUnhandled; });
427 return rc;
428} /* if any kind of handler */
429
430
431/**
432 * #PF Handler for raw-mode guest execution.
433 *
434 * @returns VBox status code (appropriate for trap handling and GC return).
435 *
436 * @param pVCpu VMCPU Handle.
437 * @param uErr The trap error code.
438 * @param pRegFrame Trap register frame.
439 * @param pvFault The fault address.
440 * @param pfLockTaken PGM lock taken here or not (out)
441 */
442PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
443{
444 PVM pVM = pVCpu->CTX_SUFF(pVM);
445
446 *pfLockTaken = false;
447
448# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
449 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
450 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
451 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
452 int rc;
453
454# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
455 /*
456 * Walk the guest page translation tables and check if it's a guest fault.
457 */
458 GSTPTWALK GstWalk;
459 rc = PGM_GST_NAME(Walk)(pVCpu, pvFault, &GstWalk);
460 if (RT_FAILURE_NP(rc))
461 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &GstWalk, uErr));
462
463 /* assert some GstWalk sanity. */
464# if PGM_GST_TYPE == PGM_TYPE_AMD64
465 AssertMsg(GstWalk.Pml4e.u == GstWalk.pPml4e->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pml4e.u, (uint64_t)GstWalk.pPml4e->u));
466# endif
467# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
468 AssertMsg(GstWalk.Pdpe.u == GstWalk.pPdpe->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pdpe.u, (uint64_t)GstWalk.pPdpe->u));
469# endif
470 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u));
471 AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u));
472 Assert(GstWalk.Core.fSucceeded);
473
474 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
475 {
476 if ( ( (uErr & X86_TRAP_PF_RW)
477 && !GstWalk.Core.fEffectiveRW
478 && ( (uErr & X86_TRAP_PF_US)
479 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
480 || ((uErr & X86_TRAP_PF_US) && !GstWalk.Core.fEffectiveUS)
481 || ((uErr & X86_TRAP_PF_ID) && GstWalk.Core.fEffectiveNX)
482 )
483 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &GstWalk, uErr));
484 }
485
486 /*
487 * Set the accessed and dirty flags.
488 */
489# if PGM_GST_TYPE == PGM_TYPE_AMD64
490 GstWalk.Pml4e.u |= X86_PML4E_A;
491 GstWalk.pPml4e->u |= X86_PML4E_A;
492 GstWalk.Pdpe.u |= X86_PDPE_A;
493 GstWalk.pPdpe->u |= X86_PDPE_A;
494# endif
495 if (GstWalk.Core.fBigPage)
496 {
497 Assert(GstWalk.Pde.b.u1Size);
498 if (uErr & X86_TRAP_PF_RW)
499 {
500 GstWalk.Pde.u |= X86_PDE4M_A | X86_PDE4M_D;
501 GstWalk.pPde->u |= X86_PDE4M_A | X86_PDE4M_D;
502 }
503 else
504 {
505 GstWalk.Pde.u |= X86_PDE4M_A;
506 GstWalk.pPde->u |= X86_PDE4M_A;
507 }
508 }
509 else
510 {
511 Assert(!GstWalk.Pde.b.u1Size);
512 GstWalk.Pde.u |= X86_PDE_A;
513 GstWalk.pPde->u |= X86_PDE_A;
514 if (uErr & X86_TRAP_PF_RW)
515 {
516# ifdef VBOX_WITH_STATISTICS
517 if (!GstWalk.Pte.n.u1Dirty)
518 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtiedPage));
519 else
520 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageAlreadyDirty));
521# endif
522 GstWalk.Pte.u |= X86_PTE_A | X86_PTE_D;
523 GstWalk.pPte->u |= X86_PTE_A | X86_PTE_D;
524 }
525 else
526 {
527 GstWalk.Pte.u |= X86_PTE_A;
528 GstWalk.pPte->u |= X86_PTE_A;
529 }
530 Assert(GstWalk.Pte.u == GstWalk.pPte->u);
531 }
532 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u,
533 ("%RX64 %RX64 pPte=%p pPde=%p Pte=%RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u, GstWalk.pPte, GstWalk.pPde, (uint64_t)GstWalk.pPte->u));
534# else /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
535 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A}; /** @todo eliminate this */
536# endif /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
537
538 /* Take the big lock now. */
539 *pfLockTaken = true;
540 pgmLock(pVM);
541
542# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
543 /*
544 * If it is a reserved bit fault we know that it is an MMIO (access
545 * handler) related fault and can skip some 200 lines of code.
546 */
547 if (uErr & X86_TRAP_PF_RSVD)
548 {
549 Assert(uErr & X86_TRAP_PF_P);
550 PPGMPAGE pPage;
551# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
552 rc = pgmPhysGetPageEx(pVM, GstWalk.Core.GCPhys, &pPage);
553 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
554 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
555 pfLockTaken, &GstWalk));
556 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
557# else
558 rc = pgmPhysGetPageEx(pVM, (RTGCPHYS)pvFault, &pPage);
559 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
560 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
561 pfLockTaken));
562 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
563# endif
564 AssertRC(rc);
565 PGM_INVL_PG(pVCpu, pvFault);
566 return rc; /* Restart with the corrected entry. */
567 }
568# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
569
570 /*
571 * Fetch the guest PDE, PDPE and PML4E.
572 */
573# if PGM_SHW_TYPE == PGM_TYPE_32BIT
574 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
575 PX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
576
577# elif PGM_SHW_TYPE == PGM_TYPE_PAE
578 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
579 PX86PDPAE pPDDst;
580# if PGM_GST_TYPE == PGM_TYPE_PAE
581 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, GstWalk.Pdpe.u, &pPDDst);
582# else
583 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, X86_PDPE_P, &pPDDst); /* RW, US and A are reserved in PAE mode. */
584# endif
585 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_INTERNAL_ERROR_4);
586
587# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
588 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
589 PX86PDPAE pPDDst;
590# if PGM_GST_TYPE == PGM_TYPE_PROT /* (AMD-V nested paging) */
591 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A,
592 X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A, &pPDDst);
593# else
594 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, GstWalk.Pml4e.u, GstWalk.Pdpe.u, &pPDDst);
595# endif
596 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_INTERNAL_ERROR_4);
597
598# elif PGM_SHW_TYPE == PGM_TYPE_EPT
599 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
600 PEPTPD pPDDst;
601 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
602 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_INTERNAL_ERROR_4);
603# endif
604 Assert(pPDDst);
605
606# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
607 /*
608 * Dirty page handling.
609 *
610 * If we successfully correct the write protection fault due to dirty bit
611 * tracking, then return immediately.
612 */
613 if (uErr & X86_TRAP_PF_RW) /* write fault? */
614 {
615 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyBitTracking), a);
616 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], GstWalk.pPde, pvFault);
617 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyBitTracking), a);
618 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
619 {
620 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
621 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
622 ? &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2DirtyAndAccessed
623 : &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2GuestTrap; });
624 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
625 return VINF_SUCCESS;
626 }
627 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u));
628 AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u));
629 }
630
631# if 0 /* rarely useful; leave for debugging. */
632 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
633# endif
634# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
635
636 /*
637 * A common case is the not-present error caused by lazy page table syncing.
638 *
639 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
640 * here so we can safely assume that the shadow PT is present when calling
641 * SyncPage later.
642 *
643 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
644 * of mapping conflict and defer to SyncCR3 in R3.
645 * (Again, we do NOT support access handlers for non-present guest pages.)
646 *
647 */
648# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
649 Assert(GstWalk.Pde.n.u1Present);
650# endif
651 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
652 && !pPDDst->a[iPDDst].n.u1Present)
653 {
654 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2SyncPT; });
655# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
656 LogFlow(("=>SyncPT %04x = %08RX64\n", (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, (uint64_t)GstWalk.Pde.u));
657 rc = PGM_BTH_NAME(SyncPT)(pVCpu, (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, GstWalk.pPd, pvFault);
658# else
659 LogFlow(("=>SyncPT pvFault=%RGv\n", pvFault));
660 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0, NULL, pvFault);
661# endif
662 if (RT_SUCCESS(rc))
663 return rc;
664 Log(("SyncPT: %RGv failed!! rc=%Rrc\n", pvFault, rc));
665 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
666 return VINF_PGM_SYNC_CR3;
667 }
668
669# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
670 /*
671 * Check if this address is within any of our mappings.
672 *
673 * This is *very* fast and it's gonna save us a bit of effort below and prevent
674 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
675 * (BTW, it's impossible to have physical access handlers in a mapping.)
676 */
677 if (pgmMapAreMappingsEnabled(pVM))
678 {
679 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
680 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
681 {
682 if (pvFault < pMapping->GCPtr)
683 break;
684 if (pvFault - pMapping->GCPtr < pMapping->cb)
685 {
686 /*
687 * The first thing we check is if we've got an undetected conflict.
688 */
689 if (pgmMapAreMappingsFloating(pVM))
690 {
691 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
692 while (iPT-- > 0)
693 if (GstWalk.pPde[iPT].n.u1Present)
694 {
695 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eConflicts);
696 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
697 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
698 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Mapping; });
699 return VINF_PGM_SYNC_CR3;
700 }
701 }
702
703 /*
704 * Check if the fault address is in a virtual page access handler range.
705 */
706 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
707 if ( pCur
708 && pvFault - pCur->Core.Key < pCur->cb
709 && uErr & X86_TRAP_PF_RW)
710 {
711# ifdef IN_RC
712 STAM_PROFILE_START(&pCur->Stat, h);
713 pgmUnlock(pVM);
714 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
715 pgmLock(pVM);
716 STAM_PROFILE_STOP(&pCur->Stat, h);
717# else
718 AssertFailed();
719 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
720# endif
721 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersMapping);
722 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Mapping; });
723 return rc;
724 }
725
726 /*
727 * Pretend we're not here and let the guest handle the trap.
728 */
729 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
730 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eGuestPFMapping);
731 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
732 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Mapping; });
733 return VINF_EM_RAW_GUEST_TRAP;
734 }
735 }
736 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
737# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
738
739 /*
740 * Check if this fault address is flagged for special treatment,
741 * which means we'll have to figure out the physical address and
742 * check flags associated with it.
743 *
744 * ASSUME that we can limit any special access handling to pages
745 * in page tables which the guest believes to be present.
746 */
747# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
748 RTGCPHYS GCPhys = GstWalk.Core.GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
749# else
750 RTGCPHYS GCPhys = (RTGCPHYS)pvFault & ~(RTGCPHYS)PAGE_OFFSET_MASK;
751# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
752 PPGMPAGE pPage;
753 rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
754 if (RT_FAILURE(rc))
755 {
756 /*
757 * When the guest accesses invalid physical memory (e.g. probing
758 * of RAM or accessing a remapped MMIO range), then we'll fall
759 * back to the recompiler to emulate the instruction.
760 */
761 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
762 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersInvalid);
763 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2InvalidPhys; });
764 return VINF_EM_RAW_EMULATE_INSTR;
765 }
766
767 /*
768 * Any handlers for this page?
769 */
770 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
771# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
772 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken,
773 &GstWalk));
774# else
775 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken));
776# endif
777
778 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTimeOutOfSync, c);
779
780# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
781 if (uErr & X86_TRAP_PF_P)
782 {
783 /*
784 * The page isn't marked, but it might still be monitored by a virtual page access handler.
785 * (ASSUMES no temporary disabling of virtual handlers.)
786 */
787 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
788 * we should correct both the shadow page table and physical memory flags, and not only check for
789 * accesses within the handler region but for access to pages with virtual handlers. */
790 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
791 if (pCur)
792 {
793 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
794 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
795 || !(uErr & X86_TRAP_PF_P)
796 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
797 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
798
799 if ( pvFault - pCur->Core.Key < pCur->cb
800 && ( uErr & X86_TRAP_PF_RW
801 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
802 {
803# ifdef IN_RC
804 STAM_PROFILE_START(&pCur->Stat, h);
805 pgmUnlock(pVM);
806 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
807 pgmLock(pVM);
808 STAM_PROFILE_STOP(&pCur->Stat, h);
809# else
810 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
811# endif
812 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndVirt; });
813 return rc;
814 }
815 }
816 }
817# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
818
819 /*
820 * We are here only if page is present in Guest page tables and
821 * trap is not handled by our handlers.
822 *
823 * Check it for page out-of-sync situation.
824 */
825 if (!(uErr & X86_TRAP_PF_P))
826 {
827 /*
828 * Page is not present in our page tables. Try to sync it!
829 */
830 if (uErr & X86_TRAP_PF_US)
831 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncUser));
832 else /* supervisor */
833 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
834
835 if (PGM_PAGE_IS_BALLOONED(pPage))
836 {
837 /* Emulate reads from ballooned pages as they are not present in
838 our shadow page tables. (Required for e.g. Solaris guests; soft
839 ecc, random nr generator.) */
840 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
841 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
842 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncBallloon));
843 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Ballooned; });
844 return rc;
845 }
846
847# if defined(LOG_ENABLED) && !defined(IN_RING0)
848 RTGCPHYS GCPhys2;
849 uint64_t fPageGst2;
850 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
851# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
852 Log(("Page out of sync: %RGv eip=%08x PdeSrc.US=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
853 pvFault, pRegFrame->eip, GstWalk.Pde.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
854# else
855 Log(("Page out of sync: %RGv eip=%08x fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
856 pvFault, pRegFrame->eip, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
857# endif
858# endif /* LOG_ENABLED */
859
860# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
861 if ( !GstWalk.Core.fEffectiveUS
862 && CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
863 {
864 /* Note: Can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU. */
865 if ( pvFault == (RTGCPTR)pRegFrame->eip
866 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
867# ifdef CSAM_DETECT_NEW_CODE_PAGES
868 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
869 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
870# endif /* CSAM_DETECT_NEW_CODE_PAGES */
871 )
872 {
873 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
874 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
875 if (rc != VINF_SUCCESS)
876 {
877 /*
878 * CSAM needs to perform a job in ring 3.
879 *
880 * Sync the page before going to the host context; otherwise we'll end up in a loop if
881 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
882 */
883 LogFlow(("CSAM ring 3 job\n"));
884 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
885 AssertRC(rc2);
886
887 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2CSAM; });
888 return rc;
889 }
890 }
891# ifdef CSAM_DETECT_NEW_CODE_PAGES
892 else if ( uErr == X86_TRAP_PF_RW
893 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
894 && pRegFrame->ecx < 0x10000)
895 {
896 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
897 * to detect loading of new code pages.
898 */
899
900 /*
901 * Decode the instruction.
902 */
903 RTGCPTR PC;
904 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs,
905 &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
906 if (rc == VINF_SUCCESS)
907 {
908 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
909 uint32_t cbOp;
910 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
911
912 /* For now we'll restrict this to rep movsw/d instructions */
913 if ( rc == VINF_SUCCESS
914 && pDis->pCurInstr->opcode == OP_MOVSWD
915 && (pDis->prefix & PREFIX_REP))
916 {
917 CSAMMarkPossibleCodePage(pVM, pvFault);
918 }
919 }
920 }
921# endif /* CSAM_DETECT_NEW_CODE_PAGES */
922
923 /*
924 * Mark this page as safe.
925 */
926 /** @todo not correct for pages that contain both code and data!! */
927 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
928 CSAMMarkPage(pVM, pvFault, true);
929 }
930# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
931# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
932 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
933# else
934 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
935# endif
936 if (RT_SUCCESS(rc))
937 {
938 /* The page was successfully synced, return to the guest. */
939 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSync; });
940 return VINF_SUCCESS;
941 }
942 }
943 else /* uErr & X86_TRAP_PF_P: */
944 {
945 /*
946 * Write protected pages are made writable when the guest makes the
947 * first write to it. This happens for pages that are shared, write
948 * monitored or not yet allocated.
949 *
950 * We may also end up here when CR0.WP=0 in the guest.
951 *
952 * Also, a side effect of not flushing global PDEs are out of sync
953 * pages due to physical monitored regions, that are no longer valid.
954 * Assume for now it only applies to the read/write flag.
955 */
956 if (uErr & X86_TRAP_PF_RW)
957 {
958 /*
959 * Check if it is a read-only page.
960 */
961 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
962 {
963 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
964 Assert(!PGM_PAGE_IS_ZERO(pPage));
965 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
966 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2MakeWritable; });
967
968 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
969 if (rc != VINF_SUCCESS)
970 {
971 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
972 return rc;
973 }
974 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
975 return VINF_EM_NO_MEMORY;
976 }
977
978# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
979 /*
980 * Check to see if we need to emulate the instruction if CR0.WP=0.
981 */
982 if ( !GstWalk.Core.fEffectiveRW
983 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
984 && CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
985 {
986 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
987 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
988 if (RT_SUCCESS(rc))
989 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eWPEmulInRZ);
990 else
991 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eWPEmulToR3);
992 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2WPEmulation; });
993 return rc;
994 }
995# endif
996 /// @todo count the above case; else
997 if (uErr & X86_TRAP_PF_US)
998 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
999 else /* supervisor */
1000 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
1001
1002 /*
1003 * Sync the page.
1004 *
1005 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
1006 * page is not present, which is not true in this case.
1007 */
1008# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1009 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
1010# else
1011 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
1012# endif
1013 if (RT_SUCCESS(rc))
1014 {
1015 /*
1016 * Page was successfully synced, return to guest but invalidate
1017 * the TLB first as the page is very likely to be in it.
1018 */
1019# if PGM_SHW_TYPE == PGM_TYPE_EPT
1020 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
1021# else
1022 PGM_INVL_PG(pVCpu, pvFault);
1023# endif
1024# ifdef VBOX_STRICT
1025 RTGCPHYS GCPhys2;
1026 uint64_t fPageGst;
1027 if (!pVM->pgm.s.fNestedPaging)
1028 {
1029 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
1030 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%Rrc fPageGst=%RX64\n", rc, fPageGst));
1031 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
1032 }
1033 uint64_t fPageShw;
1034 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
1035 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */,
1036 ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
1037# endif /* VBOX_STRICT */
1038 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndObs; });
1039 return VINF_SUCCESS;
1040 }
1041 }
1042 /** @todo else: why are we here? */
1043
1044# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && defined(VBOX_STRICT)
1045 /*
1046 * Check for VMM page flags vs. Guest page flags consistency.
1047 * Currently only for debug purposes.
1048 */
1049 if (RT_SUCCESS(rc))
1050 {
1051 /* Get guest page flags. */
1052 uint64_t fPageGst;
1053 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
1054 if (RT_SUCCESS(rc))
1055 {
1056 uint64_t fPageShw;
1057 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
1058
1059 /*
1060 * Compare page flags.
1061 * Note: we have AVL, A, D bits desynced.
1062 */
1063 AssertMsg( (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
1064 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
1065 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n",
1066 pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
1067 }
1068 else
1069 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
1070 }
1071 else
1072 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
1073# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && VBOX_STRICT */
1074 }
1075
1076
1077 /*
1078 * If we get here it is because something failed above, i.e. most like guru
1079 * meditiation time.
1080 */
1081 LogRel(("%s: returns rc=%Rrc pvFault=%RGv uErr=%RX64 cs:rip=%04x:%08RX64\n",
1082 __PRETTY_FUNCTION__, rc, pvFault, (uint64_t)uErr, pRegFrame->cs, pRegFrame->rip));
1083 return rc;
1084
1085# else /* Nested paging, EPT except PGM_GST_TYPE = PROT */
1086 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
1087 return VERR_INTERNAL_ERROR;
1088# endif
1089}
1090#endif /* !IN_RING3 */
1091
1092
1093/**
1094 * Emulation of the invlpg instruction.
1095 *
1096 *
1097 * @returns VBox status code.
1098 *
1099 * @param pVCpu The VMCPU handle.
1100 * @param GCPtrPage Page to invalidate.
1101 *
1102 * @remark ASSUMES that the guest is updating before invalidating. This order
1103 * isn't required by the CPU, so this is speculative and could cause
1104 * trouble.
1105 * @remark No TLB shootdown is done on any other VCPU as we assume that
1106 * invlpg emulation is the *only* reason for calling this function.
1107 * (The guest has to shoot down TLB entries on other CPUs itself)
1108 * Currently true, but keep in mind!
1109 *
1110 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1111 * Should only be required when PGMPOOL_WITH_OPTIMIZED_DIRTY_PT is active (PAE or AMD64 (for now))
1112 */
1113PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
1114{
1115#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1116 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1117 && PGM_SHW_TYPE != PGM_TYPE_EPT
1118 int rc;
1119 PVM pVM = pVCpu->CTX_SUFF(pVM);
1120 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1121
1122 PGM_LOCK_ASSERT_OWNER(pVM);
1123
1124 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1125
1126 /*
1127 * Get the shadow PD entry and skip out if this PD isn't present.
1128 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1129 */
1130# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1131 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1132 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1133
1134 /* Fetch the pgm pool shadow descriptor. */
1135 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1136 Assert(pShwPde);
1137
1138# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1139 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1140 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
1141
1142 /* If the shadow PDPE isn't present, then skip the invalidate. */
1143 if (!pPdptDst->a[iPdpt].n.u1Present)
1144 {
1145 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1146 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1147 return VINF_SUCCESS;
1148 }
1149
1150 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1151 PPGMPOOLPAGE pShwPde = NULL;
1152 PX86PDPAE pPDDst;
1153
1154 /* Fetch the pgm pool shadow descriptor. */
1155 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
1156 AssertRCSuccessReturn(rc, rc);
1157 Assert(pShwPde);
1158
1159 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1160 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1161
1162# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1163 /* PML4 */
1164 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1165 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1166 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1167 PX86PDPAE pPDDst;
1168 PX86PDPT pPdptDst;
1169 PX86PML4E pPml4eDst;
1170 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1171 if (rc != VINF_SUCCESS)
1172 {
1173 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1174 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1175 return VINF_SUCCESS;
1176 }
1177 Assert(pPDDst);
1178
1179 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1180 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1181
1182 if (!pPdpeDst->n.u1Present)
1183 {
1184 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1185 return VINF_SUCCESS;
1186 }
1187
1188 /* Fetch the pgm pool shadow descriptor. */
1189 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1190 Assert(pShwPde);
1191
1192# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1193
1194 const SHWPDE PdeDst = *pPdeDst;
1195 if (!PdeDst.n.u1Present)
1196 {
1197 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1198 return VINF_SUCCESS;
1199 }
1200
1201 /*
1202 * Get the guest PD entry and calc big page.
1203 */
1204# if PGM_GST_TYPE == PGM_TYPE_32BIT
1205 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1206 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1207 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1208# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1209 unsigned iPDSrc = 0;
1210# if PGM_GST_TYPE == PGM_TYPE_PAE
1211 X86PDPE PdpeSrcIgn;
1212 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrcIgn);
1213# else /* AMD64 */
1214 PX86PML4E pPml4eSrcIgn;
1215 X86PDPE PdpeSrcIgn;
1216 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrcIgn, &PdpeSrcIgn, &iPDSrc);
1217# endif
1218 GSTPDE PdeSrc;
1219
1220 if (pPDSrc)
1221 PdeSrc = pPDSrc->a[iPDSrc];
1222 else
1223 PdeSrc.u = 0;
1224# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1225 const bool fIsBigPage = PdeSrc.b.u1Size && GST_IS_PSE_ACTIVE(pVCpu);
1226
1227# ifdef IN_RING3
1228 /*
1229 * If a CR3 Sync is pending we may ignore the invalidate page operation
1230 * depending on the kind of sync and if it's a global page or not.
1231 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1232 */
1233# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1234 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1235 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1236 && fIsBigPage
1237 && PdeSrc.b.u1Global
1238 )
1239 )
1240# else
1241 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1242# endif
1243 {
1244 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1245 return VINF_SUCCESS;
1246 }
1247# endif /* IN_RING3 */
1248
1249 /*
1250 * Deal with the Guest PDE.
1251 */
1252 rc = VINF_SUCCESS;
1253 if (PdeSrc.n.u1Present)
1254 {
1255 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1256 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1257# ifndef PGM_WITHOUT_MAPPING
1258 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1259 {
1260 /*
1261 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1262 */
1263 Assert(pgmMapAreMappingsEnabled(pVM));
1264 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1265 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1266 }
1267 else
1268# endif /* !PGM_WITHOUT_MAPPING */
1269 if (!fIsBigPage)
1270 {
1271 /*
1272 * 4KB - page.
1273 */
1274 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1275 RTGCPHYS GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1276
1277# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1278 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1279 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1280# endif
1281 if (pShwPage->GCPhys == GCPhys)
1282 {
1283 /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1284 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1285
1286 PGSTPT pPTSrc;
1287 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1288 if (RT_SUCCESS(rc))
1289 {
1290 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1291 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1292 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1293 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1294 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1295 GCPtrPage, PteSrc.n.u1Present,
1296 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1297 PteSrc.n.u1User & PdeSrc.n.u1User,
1298 (uint64_t)PteSrc.u,
1299 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1300 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1301 }
1302 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePage4KBPages));
1303 PGM_INVL_PG(pVCpu, GCPtrPage);
1304 }
1305 else
1306 {
1307 /*
1308 * The page table address changed.
1309 */
1310 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1311 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1312 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1313 ASMAtomicWriteSize(pPdeDst, 0);
1314 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1315 PGM_INVL_VCPU_TLBS(pVCpu);
1316 }
1317 }
1318 else
1319 {
1320 /*
1321 * 2/4MB - page.
1322 */
1323 /* Before freeing the page, check if anything really changed. */
1324 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1325 RTGCPHYS GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1326# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1327 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1328 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1329# endif
1330 if ( pShwPage->GCPhys == GCPhys
1331 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1332 {
1333 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1334 /** @todo This test is wrong as it cannot check the G bit!
1335 * FIXME */
1336 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1337 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1338 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1339 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1340 {
1341 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1342 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1343 return VINF_SUCCESS;
1344 }
1345 }
1346
1347 /*
1348 * Ok, the page table is present and it's been changed in the guest.
1349 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1350 * We could do this for some flushes in GC too, but we need an algorithm for
1351 * deciding which 4MB pages containing code likely to be executed very soon.
1352 */
1353 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1354 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1355 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1356 ASMAtomicWriteSize(pPdeDst, 0);
1357 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePage4MBPages));
1358 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1359 }
1360 }
1361 else
1362 {
1363 /*
1364 * Page directory is not present, mark shadow PDE not present.
1365 */
1366 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1367 {
1368 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1369 ASMAtomicWriteSize(pPdeDst, 0);
1370 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePagePDNPs));
1371 PGM_INVL_PG(pVCpu, GCPtrPage);
1372 }
1373 else
1374 {
1375 Assert(pgmMapAreMappingsEnabled(pVM));
1376 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePagePDMappings));
1377 }
1378 }
1379 return rc;
1380
1381#else /* guest real and protected mode */
1382 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1383 return VINF_SUCCESS;
1384#endif
1385}
1386
1387
1388/**
1389 * Update the tracking of shadowed pages.
1390 *
1391 * @param pVCpu The VMCPU handle.
1392 * @param pShwPage The shadow page.
1393 * @param HCPhys The physical page we is being dereferenced.
1394 * @param iPte Shadow PTE index
1395 * @param GCPhysPage Guest physical address (only valid if pShwPage->fDirty is set)
1396 */
1397DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte, RTGCPHYS GCPhysPage)
1398{
1399 PVM pVM = pVCpu->CTX_SUFF(pVM);
1400
1401# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1402 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1403 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1404
1405 /* Use the hint we retrieved from the cached guest PT. */
1406 if (pShwPage->fDirty)
1407 {
1408 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1409
1410 Assert(pShwPage->cPresent);
1411 Assert(pPool->cPresent);
1412 pShwPage->cPresent--;
1413 pPool->cPresent--;
1414
1415 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysPage);
1416 AssertRelease(pPhysPage);
1417 pgmTrackDerefGCPhys(pPool, pShwPage, pPhysPage, iPte);
1418 return;
1419 }
1420# endif
1421
1422 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackDeref, a);
1423 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1424
1425 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1426 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1427 * 2. write protect all shadowed pages. I.e. implement caching.
1428 */
1429 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1430
1431 /*
1432 * Find the guest address.
1433 */
1434 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
1435 pRam;
1436 pRam = pRam->CTX_SUFF(pNext))
1437 {
1438 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1439 while (iPage-- > 0)
1440 {
1441 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1442 {
1443 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1444
1445 Assert(pShwPage->cPresent);
1446 Assert(pPool->cPresent);
1447 pShwPage->cPresent--;
1448 pPool->cPresent--;
1449
1450 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1451 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackDeref, a);
1452 return;
1453 }
1454 }
1455 }
1456
1457 for (;;)
1458 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1459}
1460
1461
1462/**
1463 * Update the tracking of shadowed pages.
1464 *
1465 * @param pVCpu The VMCPU handle.
1466 * @param pShwPage The shadow page.
1467 * @param u16 The top 16-bit of the pPage->HCPhys.
1468 * @param pPage Pointer to the guest page. this will be modified.
1469 * @param iPTDst The index into the shadow table.
1470 */
1471DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1472{
1473 PVM pVM = pVCpu->CTX_SUFF(pVM);
1474
1475 /*
1476 * Just deal with the simple first time here.
1477 */
1478 if (!u16)
1479 {
1480 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackVirgin);
1481 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1482 /* Save the page table index. */
1483 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, iPTDst);
1484 }
1485 else
1486 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1487
1488 /* write back */
1489 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1490 PGM_PAGE_SET_TRACKING(pVM, pPage, u16);
1491
1492 /* update statistics. */
1493 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1494 pShwPage->cPresent++;
1495 if (pShwPage->iFirstPresent > iPTDst)
1496 pShwPage->iFirstPresent = iPTDst;
1497}
1498
1499
1500/**
1501 * Modifies a shadow PTE to account for access handlers.
1502 *
1503 * @param pVM The VM handle.
1504 * @param pPage The page in question.
1505 * @param fPteSrc The shadowed flags of the source PTE. Must include the
1506 * A (accessed) bit so it can be emulated correctly.
1507 * @param pPteDst The shadow PTE (output). This is temporary storage and
1508 * does not need to be set atomically.
1509 */
1510DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVM pVM, PCPGMPAGE pPage, uint64_t fPteSrc, PSHWPTE pPteDst)
1511{
1512 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1513 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1514 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1515 {
1516 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark read-only\n", pPage));
1517#if PGM_SHW_TYPE == PGM_TYPE_EPT
1518 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage);
1519 pPteDst->n.u1Present = 1;
1520 pPteDst->n.u1Execute = 1;
1521 pPteDst->n.u1IgnorePAT = 1;
1522 pPteDst->n.u3EMT = VMX_EPT_MEMTYPE_WB;
1523 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1524#else
1525 if (fPteSrc & X86_PTE_A)
1526 {
1527 SHW_PTE_SET(*pPteDst, fPteSrc | PGM_PAGE_GET_HCPHYS(pPage));
1528 SHW_PTE_SET_RO(*pPteDst);
1529 }
1530 else
1531 SHW_PTE_SET(*pPteDst, 0);
1532#endif
1533 }
1534#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1535# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1536 else if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
1537 && ( BTH_IS_NP_ACTIVE(pVM)
1538 || (fPteSrc & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW) /** @todo Remove X86_PTE_US here and pGstWalk->Core.fEffectiveUS before the sync page test. */
1539# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1540 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1541# endif
1542 )
1543 {
1544 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1545# if PGM_SHW_TYPE == PGM_TYPE_EPT
1546 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1547 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg;
1548 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1549 pPteDst->n.u1Present = 0;
1550 pPteDst->n.u1Write = 1;
1551 pPteDst->n.u1Execute = 0;
1552 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1553 pPteDst->n.u3EMT = 7;
1554# else
1555 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1556 SHW_PTE_SET(*pPteDst, pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P);
1557# endif
1558 }
1559# endif
1560#endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1561 else
1562 {
1563 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1564 SHW_PTE_SET(*pPteDst, 0);
1565 }
1566 /** @todo count these kinds of entries. */
1567}
1568
1569
1570/**
1571 * Creates a 4K shadow page for a guest page.
1572 *
1573 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1574 * physical address. The PdeSrc argument only the flags are used. No page
1575 * structured will be mapped in this function.
1576 *
1577 * @param pVCpu The VMCPU handle.
1578 * @param pPteDst Destination page table entry.
1579 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1580 * Can safely assume that only the flags are being used.
1581 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1582 * @param pShwPage Pointer to the shadow page.
1583 * @param iPTDst The index into the shadow table.
1584 *
1585 * @remark Not used for 2/4MB pages!
1586 */
1587#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1588static void PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1589 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1590#else
1591static void PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1592#endif
1593{
1594 PVM pVM = pVCpu->CTX_SUFF(pVM);
1595 RTGCPHYS GCPhysOldPage = NIL_RTGCPHYS;
1596
1597#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1598 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1599 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1600
1601 if (pShwPage->fDirty)
1602 {
1603 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1604 PGSTPT pGstPT;
1605
1606 /* Note that iPTDst can be used to index the guest PT even in the pae/32bit combo as we copy only half the table; see pgmPoolAddDirtyPage. */
1607 pGstPT = (PGSTPT)&pPool->aDirtyPages[pShwPage->idxDirty].aPage[0];
1608 GCPhysOldPage = GST_GET_PTE_GCPHYS(pGstPT->a[iPTDst]);
1609 pGstPT->a[iPTDst].u = PteSrc.u;
1610 }
1611#else
1612 Assert(!pShwPage->fDirty);
1613#endif
1614
1615#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1616 if ( PteSrc.n.u1Present
1617 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1618#endif
1619 {
1620# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1621 RTGCPHYS GCPhysPage = GST_GET_PTE_GCPHYS(PteSrc);
1622# endif
1623 /*
1624 * Find the ram range.
1625 */
1626 PPGMPAGE pPage;
1627 int rc = pgmPhysGetPageEx(pVM, GCPhysPage, &pPage);
1628 if (RT_SUCCESS(rc))
1629 {
1630 /* Ignore ballooned pages.
1631 Don't return errors or use a fatal assert here as part of a
1632 shadow sync range might included ballooned pages. */
1633 if (PGM_PAGE_IS_BALLOONED(pPage))
1634 {
1635 Assert(!SHW_PTE_IS_P(*pPteDst)); /** @todo user tracking needs updating if this triggers. */
1636 return;
1637 }
1638
1639#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1640 /* Make the page writable if necessary. */
1641 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1642 && ( PGM_PAGE_IS_ZERO(pPage)
1643# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1644 || ( PteSrc.n.u1Write
1645# else
1646 || ( 1
1647# endif
1648 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1649# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1650 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1651# endif
1652# ifdef VBOX_WITH_PAGE_SHARING
1653 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1654# endif
1655 )
1656 )
1657 )
1658 {
1659 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhysPage);
1660 AssertRC(rc);
1661 }
1662#endif
1663
1664 /*
1665 * Make page table entry.
1666 */
1667 SHWPTE PteDst;
1668# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1669 uint64_t fGstShwPteFlags = GST_GET_PTE_SHW_FLAGS(pVCpu, PteSrc);
1670# else
1671 uint64_t fGstShwPteFlags = X86_PTE_P | X86_PTE_RW | X86_PTE_US | X86_PTE_A | X86_PTE_D;
1672# endif
1673 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1674 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, fGstShwPteFlags, &PteDst);
1675 else
1676 {
1677#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1678 /*
1679 * If the page or page directory entry is not marked accessed,
1680 * we mark the page not present.
1681 */
1682 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1683 {
1684 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1685 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,AccessedPage));
1686 SHW_PTE_SET(PteDst, 0);
1687 }
1688 /*
1689 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1690 * when the page is modified.
1691 */
1692 else if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1693 {
1694 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPage));
1695 SHW_PTE_SET(PteDst,
1696 fGstShwPteFlags
1697 | PGM_PAGE_GET_HCPHYS(pPage)
1698 | PGM_PTFLAGS_TRACK_DIRTY);
1699 SHW_PTE_SET_RO(PteDst);
1700 }
1701 else
1702#endif
1703 {
1704 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageSkipped));
1705#if PGM_SHW_TYPE == PGM_TYPE_EPT
1706 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1707 PteDst.n.u1Present = 1;
1708 PteDst.n.u1Write = 1;
1709 PteDst.n.u1Execute = 1;
1710 PteDst.n.u1IgnorePAT = 1;
1711 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1712 /* PteDst.n.u1Size = 0 */
1713#else
1714 SHW_PTE_SET(PteDst, fGstShwPteFlags | PGM_PAGE_GET_HCPHYS(pPage));
1715#endif
1716 }
1717
1718 /*
1719 * Make sure only allocated pages are mapped writable.
1720 */
1721 if ( SHW_PTE_IS_P_RW(PteDst)
1722 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1723 {
1724 /* Still applies to shared pages. */
1725 Assert(!PGM_PAGE_IS_ZERO(pPage));
1726 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet. Why, isn't it? */
1727 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", GCPhysPage, pPage, iPTDst));
1728 }
1729 }
1730
1731 /*
1732 * Keep user track up to date.
1733 */
1734 if (SHW_PTE_IS_P(PteDst))
1735 {
1736 if (!SHW_PTE_IS_P(*pPteDst))
1737 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1738 else if (SHW_PTE_GET_HCPHYS(*pPteDst) != SHW_PTE_GET_HCPHYS(PteDst))
1739 {
1740 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst), SHW_PTE_LOG64(PteDst)));
1741 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1742 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1743 }
1744 }
1745 else if (SHW_PTE_IS_P(*pPteDst))
1746 {
1747 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1748 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1749 }
1750
1751 /*
1752 * Update statistics and commit the entry.
1753 */
1754#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1755 if (!PteSrc.n.u1Global)
1756 pShwPage->fSeenNonGlobal = true;
1757#endif
1758 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
1759 return;
1760 }
1761
1762/** @todo count these three different kinds. */
1763 Log2(("SyncPageWorker: invalid address in Pte\n"));
1764 }
1765#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1766 else if (!PteSrc.n.u1Present)
1767 Log2(("SyncPageWorker: page not present in Pte\n"));
1768 else
1769 Log2(("SyncPageWorker: invalid Pte\n"));
1770#endif
1771
1772 /*
1773 * The page is not present or the PTE is bad. Replace the shadow PTE by
1774 * an empty entry, making sure to keep the user tracking up to date.
1775 */
1776 if (SHW_PTE_IS_P(*pPteDst))
1777 {
1778 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1779 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1780 }
1781 SHW_PTE_ATOMIC_SET(*pPteDst, 0);
1782}
1783
1784
1785/**
1786 * Syncs a guest OS page.
1787 *
1788 * There are no conflicts at this point, neither is there any need for
1789 * page table allocations.
1790 *
1791 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
1792 * When called in AMD64 guest mode, the guest PML4E shall be valid.
1793 *
1794 * @returns VBox status code.
1795 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1796 * @param pVCpu The VMCPU handle.
1797 * @param PdeSrc Page directory entry of the guest.
1798 * @param GCPtrPage Guest context page address.
1799 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1800 * @param uErr Fault error (X86_TRAP_PF_*).
1801 */
1802static int PGM_BTH_NAME(SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1803{
1804 PVM pVM = pVCpu->CTX_SUFF(pVM);
1805 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1806 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1807
1808 PGM_LOCK_ASSERT_OWNER(pVM);
1809
1810#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1811 || PGM_GST_TYPE == PGM_TYPE_PAE \
1812 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1813 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1814 && PGM_SHW_TYPE != PGM_TYPE_EPT
1815
1816 /*
1817 * Assert preconditions.
1818 */
1819 Assert(PdeSrc.n.u1Present);
1820 Assert(cPages);
1821# if 0 /* rarely useful; leave for debugging. */
1822 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1823# endif
1824
1825 /*
1826 * Get the shadow PDE, find the shadow page table in the pool.
1827 */
1828# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1829 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1830 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1831
1832 /* Fetch the pgm pool shadow descriptor. */
1833 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1834 Assert(pShwPde);
1835
1836# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1837 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1838 PPGMPOOLPAGE pShwPde = NULL;
1839 PX86PDPAE pPDDst;
1840
1841 /* Fetch the pgm pool shadow descriptor. */
1842 int rc2 = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
1843 AssertRCSuccessReturn(rc2, rc2);
1844 Assert(pShwPde);
1845
1846 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1847 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1848
1849# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1850 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1851 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1852 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1853 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1854
1855 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1856 AssertRCSuccessReturn(rc2, rc2);
1857 Assert(pPDDst && pPdptDst);
1858 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1859# endif
1860 SHWPDE PdeDst = *pPdeDst;
1861
1862 /*
1863 * - In the guest SMP case we could have blocked while another VCPU reused
1864 * this page table.
1865 * - With W7-64 we may also take this path when the the A bit is cleared on
1866 * higher level tables (PDPE/PML4E). The guest does not invalidate the
1867 * relevant TLB entries. If we're write monitoring any page mapped by
1868 * the modified entry, we may end up here with a "stale" TLB entry.
1869 */
1870 if (!PdeDst.n.u1Present)
1871 {
1872 Log(("CPU%u: SyncPage: Pde at %RGv changed behind our back? (pPdeDst=%p/%RX64) uErr=%#x\n", pVCpu->idCpu, GCPtrPage, pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
1873 AssertMsg(pVM->cCpus > 1 || (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW),
1874 ("Unexpected missing PDE p=%p/%RX64 uErr=%#x\n", pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
1875 if (uErr & X86_TRAP_PF_P)
1876 PGM_INVL_PG(pVCpu, GCPtrPage);
1877 return VINF_SUCCESS; /* force the instruction to be executed again. */
1878 }
1879
1880 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1881 Assert(pShwPage);
1882
1883# if PGM_GST_TYPE == PGM_TYPE_AMD64
1884 /* Fetch the pgm pool shadow descriptor. */
1885 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1886 Assert(pShwPde);
1887# endif
1888
1889 /*
1890 * Check that the page is present and that the shadow PDE isn't out of sync.
1891 */
1892 const bool fBigPage = PdeSrc.b.u1Size && GST_IS_PSE_ACTIVE(pVCpu);
1893 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
1894 RTGCPHYS GCPhys;
1895 if (!fBigPage)
1896 {
1897 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1898# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1899 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1900 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1901# endif
1902 }
1903 else
1904 {
1905 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1906# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1907 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1908 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1909# endif
1910 }
1911 /** @todo This doesn't check the G bit of 2/4MB pages. FIXME */
1912 if ( fPdeValid
1913 && pShwPage->GCPhys == GCPhys
1914 && PdeSrc.n.u1Present
1915 && PdeSrc.n.u1User == PdeDst.n.u1User
1916 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1917# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1918 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !GST_IS_NX_ACTIVE(pVCpu))
1919# endif
1920 )
1921 {
1922 /*
1923 * Check that the PDE is marked accessed already.
1924 * Since we set the accessed bit *before* getting here on a #PF, this
1925 * check is only meant for dealing with non-#PF'ing paths.
1926 */
1927 if (PdeSrc.n.u1Accessed)
1928 {
1929 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1930 if (!fBigPage)
1931 {
1932 /*
1933 * 4KB Page - Map the guest page table.
1934 */
1935 PGSTPT pPTSrc;
1936 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1937 if (RT_SUCCESS(rc))
1938 {
1939# ifdef PGM_SYNC_N_PAGES
1940 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1941 if ( cPages > 1
1942 && !(uErr & X86_TRAP_PF_P)
1943 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1944 {
1945 /*
1946 * This code path is currently only taken when the caller is PGMTrap0eHandler
1947 * for non-present pages!
1948 *
1949 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1950 * deal with locality.
1951 */
1952 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1953 const unsigned iPTDstPage = iPTDst;
1954# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1955 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1956 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1957# else
1958 const unsigned offPTSrc = 0;
1959# endif
1960 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1961 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1962 iPTDst = 0;
1963 else
1964 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1965
1966 for (; iPTDst < iPTDstEnd; iPTDst++)
1967 {
1968 const PGSTPTE pPteSrc = &pPTSrc->a[offPTSrc + iPTDst];
1969
1970 if ( pPteSrc->n.u1Present
1971 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
1972 {
1973 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1974 NOREF(GCPtrCurPage);
1975#ifndef IN_RING0
1976 /*
1977 * Assuming kernel code will be marked as supervisor - and not as user level
1978 * and executed using a conforming code selector - And marked as readonly.
1979 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1980 */
1981 PPGMPAGE pPage;
1982 if ( ((PdeSrc.u & pPteSrc->u) & (X86_PTE_RW | X86_PTE_US))
1983 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1984 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
1985 || ( (pPage = pgmPhysGetPage(pVM, pPteSrc->u & GST_PTE_PG_MASK))
1986 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1987 )
1988#endif /* else: CSAM not active */
1989 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, *pPteSrc, pShwPage, iPTDst);
1990 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1991 GCPtrCurPage, pPteSrc->n.u1Present,
1992 pPteSrc->n.u1Write & PdeSrc.n.u1Write,
1993 pPteSrc->n.u1User & PdeSrc.n.u1User,
1994 (uint64_t)pPteSrc->u,
1995 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1996 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1997 }
1998 }
1999 }
2000 else
2001# endif /* PGM_SYNC_N_PAGES */
2002 {
2003 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
2004 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2005 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2006 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2007 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
2008 GCPtrPage, PteSrc.n.u1Present,
2009 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2010 PteSrc.n.u1User & PdeSrc.n.u1User,
2011 (uint64_t)PteSrc.u,
2012 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2013 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2014 }
2015 }
2016 else /* MMIO or invalid page: emulated in #PF handler. */
2017 {
2018 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
2019 Assert(!SHW_PTE_IS_P(pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK]));
2020 }
2021 }
2022 else
2023 {
2024 /*
2025 * 4/2MB page - lazy syncing shadow 4K pages.
2026 * (There are many causes of getting here, it's no longer only CSAM.)
2027 */
2028 /* Calculate the GC physical address of this 4KB shadow page. */
2029 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
2030 /* Find ram range. */
2031 PPGMPAGE pPage;
2032 int rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
2033 if (RT_SUCCESS(rc))
2034 {
2035 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
2036
2037# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2038 /* Try to make the page writable if necessary. */
2039 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2040 && ( PGM_PAGE_IS_ZERO(pPage)
2041 || ( PdeSrc.n.u1Write
2042 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2043# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2044 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2045# endif
2046# ifdef VBOX_WITH_PAGE_SHARING
2047 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2048# endif
2049 )
2050 )
2051 )
2052 {
2053 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2054 AssertRC(rc);
2055 }
2056# endif
2057
2058 /*
2059 * Make shadow PTE entry.
2060 */
2061 SHWPTE PteDst;
2062 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2063 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc), &PteDst);
2064 else
2065 SHW_PTE_SET(PteDst, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc) | PGM_PAGE_GET_HCPHYS(pPage));
2066
2067 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2068 if ( SHW_PTE_IS_P(PteDst)
2069 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2070 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2071
2072 /* Make sure only allocated pages are mapped writable. */
2073 if ( SHW_PTE_IS_P_RW(PteDst)
2074 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2075 {
2076 /* Still applies to shared pages. */
2077 Assert(!PGM_PAGE_IS_ZERO(pPage));
2078 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2079 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
2080 }
2081
2082 SHW_PTE_ATOMIC_SET2(pPTDst->a[iPTDst], PteDst);
2083
2084 /*
2085 * If the page is not flagged as dirty and is writable, then make it read-only
2086 * at PD level, so we can set the dirty bit when the page is modified.
2087 *
2088 * ASSUMES that page access handlers are implemented on page table entry level.
2089 * Thus we will first catch the dirty access and set PDE.D and restart. If
2090 * there is an access handler, we'll trap again and let it work on the problem.
2091 */
2092 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
2093 * As for invlpg, it simply frees the whole shadow PT.
2094 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
2095 if ( !PdeSrc.b.u1Dirty
2096 && PdeSrc.b.u1Write)
2097 {
2098 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageBig));
2099 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2100 PdeDst.n.u1Write = 0;
2101 }
2102 else
2103 {
2104 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2105 PdeDst.n.u1Write = PdeSrc.n.u1Write;
2106 }
2107 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2108 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
2109 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
2110 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2111 }
2112 else
2113 {
2114 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
2115 /** @todo must wipe the shadow page table entry in this
2116 * case. */
2117 }
2118 }
2119 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2120 return VINF_SUCCESS;
2121 }
2122
2123 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPagePDNAs));
2124 }
2125 else if (fPdeValid)
2126 {
2127 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2128 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2129 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2130 }
2131 else
2132 {
2133/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSyncAndInvalid));
2134 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2135 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2136 }
2137
2138 /*
2139 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
2140 * Yea, I'm lazy.
2141 */
2142 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
2143 ASMAtomicWriteSize(pPdeDst, 0);
2144
2145 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2146 PGM_INVL_VCPU_TLBS(pVCpu);
2147 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
2148
2149
2150#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2151 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2152 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2153 && !defined(IN_RC)
2154
2155# ifdef PGM_SYNC_N_PAGES
2156 /*
2157 * Get the shadow PDE, find the shadow page table in the pool.
2158 */
2159# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2160 X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
2161
2162# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2163 X86PDEPAE PdeDst = pgmShwGetPaePDE(pVCpu, GCPtrPage);
2164
2165# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2166 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2167 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2168 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2169 X86PDEPAE PdeDst;
2170 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2171
2172 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2173 AssertRCSuccessReturn(rc, rc);
2174 Assert(pPDDst && pPdptDst);
2175 PdeDst = pPDDst->a[iPDDst];
2176# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2177 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2178 PEPTPD pPDDst;
2179 EPTPDE PdeDst;
2180
2181 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2182 if (rc != VINF_SUCCESS)
2183 {
2184 AssertRC(rc);
2185 return rc;
2186 }
2187 Assert(pPDDst);
2188 PdeDst = pPDDst->a[iPDDst];
2189# endif
2190 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2191 if (!PdeDst.n.u1Present)
2192 {
2193 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2194 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2195 return VINF_SUCCESS; /* force the instruction to be executed again. */
2196 }
2197
2198 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2199 if (PdeDst.n.u1Size)
2200 {
2201 Assert(pVM->pgm.s.fNestedPaging);
2202 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2203 return VINF_SUCCESS;
2204 }
2205
2206 /* Mask away the page offset. */
2207 GCPtrPage &= ~((RTGCPTR)0xfff);
2208
2209 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2210 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2211
2212 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2213 if ( cPages > 1
2214 && !(uErr & X86_TRAP_PF_P)
2215 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2216 {
2217 /*
2218 * This code path is currently only taken when the caller is PGMTrap0eHandler
2219 * for non-present pages!
2220 *
2221 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2222 * deal with locality.
2223 */
2224 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2225 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2226 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2227 iPTDst = 0;
2228 else
2229 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2230 for (; iPTDst < iPTDstEnd; iPTDst++)
2231 {
2232 if (!SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2233 {
2234 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2235
2236 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2237 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
2238 GCPtrCurPage,
2239 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2240 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2241
2242 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2243 break;
2244 }
2245 else
2246 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, SHW_PTE_LOG64(pPTDst->a[iPTDst]) ));
2247 }
2248 }
2249 else
2250# endif /* PGM_SYNC_N_PAGES */
2251 {
2252 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2253 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2254
2255 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2256
2257 Log2(("SyncPage: 4K %RGv PteSrc:{P=1 RW=1 U=1}PteDst=%08llx%s\n",
2258 GCPtrPage,
2259 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2260 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2261 }
2262 return VINF_SUCCESS;
2263
2264#else
2265 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2266 return VERR_INTERNAL_ERROR;
2267#endif
2268}
2269
2270
2271#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2272
2273/**
2274 * CheckPageFault helper for returning a page fault indicating a non-present
2275 * (NP) entry in the page translation structures.
2276 *
2277 * @returns VINF_EM_RAW_GUEST_TRAP.
2278 * @param pVCpu The virtual CPU to operate on.
2279 * @param uErr The error code of the shadow fault. Corrections to
2280 * TRPM's copy will be made if necessary.
2281 * @param GCPtrPage For logging.
2282 * @param uPageFaultLevel For logging.
2283 */
2284DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnNP)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2285{
2286 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyTrackRealPF));
2287 AssertMsg(!(uErr & X86_TRAP_PF_P), ("%#x\n", uErr));
2288 AssertMsg(!(uErr & X86_TRAP_PF_RSVD), ("%#x\n", uErr));
2289 if (uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2290 TRPMSetErrorCode(pVCpu, uErr & ~(X86_TRAP_PF_RSVD | X86_TRAP_PF_P));
2291
2292 Log(("CheckPageFault: real page fault (notp) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2293 return VINF_EM_RAW_GUEST_TRAP;
2294}
2295
2296
2297/**
2298 * CheckPageFault helper for returning a page fault indicating a reserved bit
2299 * (RSVD) error in the page translation structures.
2300 *
2301 * @returns VINF_EM_RAW_GUEST_TRAP.
2302 * @param pVCpu The virtual CPU to operate on.
2303 * @param uErr The error code of the shadow fault. Corrections to
2304 * TRPM's copy will be made if necessary.
2305 * @param GCPtrPage For logging.
2306 * @param uPageFaultLevel For logging.
2307 */
2308DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnRSVD)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2309{
2310 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyTrackRealPF));
2311 if ((uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2312 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_RSVD | X86_TRAP_PF_P);
2313
2314 Log(("CheckPageFault: real page fault (rsvd) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2315 return VINF_EM_RAW_GUEST_TRAP;
2316}
2317
2318
2319/**
2320 * CheckPageFault helper for returning a page protection fault (P).
2321 *
2322 * @returns VINF_EM_RAW_GUEST_TRAP.
2323 * @param pVCpu The virtual CPU to operate on.
2324 * @param uErr The error code of the shadow fault. Corrections to
2325 * TRPM's copy will be made if necessary.
2326 * @param GCPtrPage For logging.
2327 * @param uPageFaultLevel For logging.
2328 */
2329DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnProt)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2330{
2331 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyTrackRealPF));
2332 AssertMsg(uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID), ("%#x\n", uErr));
2333 if ((uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) != X86_TRAP_PF_P)
2334 TRPMSetErrorCode(pVCpu, (uErr & ~X86_TRAP_PF_RSVD) | X86_TRAP_PF_P);
2335
2336 Log(("CheckPageFault: real page fault (prot) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2337 return VINF_EM_RAW_GUEST_TRAP;
2338}
2339
2340
2341/**
2342 * Handle dirty bit tracking faults.
2343 *
2344 * @returns VBox status code.
2345 * @param pVCpu The VMCPU handle.
2346 * @param uErr Page fault error code.
2347 * @param pPdeSrc Guest page directory entry.
2348 * @param pPdeDst Shadow page directory entry.
2349 * @param GCPtrPage Guest context page address.
2350 */
2351static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage)
2352{
2353 PVM pVM = pVCpu->CTX_SUFF(pVM);
2354 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2355
2356 PGM_LOCK_ASSERT_OWNER(pVM);
2357
2358 /*
2359 * Handle big page.
2360 */
2361 if (pPdeSrc->b.u1Size && GST_IS_PSE_ACTIVE(pVCpu))
2362 {
2363 if ( pPdeDst->n.u1Present
2364 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2365 {
2366 SHWPDE PdeDst = *pPdeDst;
2367
2368 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageTrap));
2369 Assert(pPdeSrc->b.u1Write);
2370
2371 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2372 * fault again and take this path to only invalidate the entry (see below).
2373 */
2374 PdeDst.n.u1Write = 1;
2375 PdeDst.n.u1Accessed = 1;
2376 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2377 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2378 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2379 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2380 }
2381
2382# ifdef IN_RING0
2383 /* Check for stale TLB entry; only applies to the SMP guest case. */
2384 if ( pVM->cCpus > 1
2385 && pPdeDst->n.u1Write
2386 && pPdeDst->n.u1Accessed)
2387 {
2388 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2389 if (pShwPage)
2390 {
2391 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2392 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2393 if (SHW_PTE_IS_P_RW(*pPteDst))
2394 {
2395 /* Stale TLB entry. */
2396 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageStale));
2397 PGM_INVL_PG(pVCpu, GCPtrPage);
2398 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2399 }
2400 }
2401 }
2402# endif /* IN_RING0 */
2403 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2404 }
2405
2406 /*
2407 * Map the guest page table.
2408 */
2409 PGSTPT pPTSrc;
2410 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(*pPdeSrc), &pPTSrc);
2411 if (RT_FAILURE(rc))
2412 {
2413 AssertRC(rc);
2414 return rc;
2415 }
2416
2417 if (pPdeDst->n.u1Present)
2418 {
2419 GSTPTE const *pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2420 const GSTPTE PteSrc = *pPteSrc;
2421
2422#ifndef IN_RING0
2423 /* Bail out here as pgmPoolGetPage will return NULL and we'll crash below.
2424 * Our individual shadow handlers will provide more information and force a fatal exit.
2425 */
2426 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2427 {
2428 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2429 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2430 }
2431#endif
2432 /*
2433 * Map shadow page table.
2434 */
2435 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2436 if (pShwPage)
2437 {
2438 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2439 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2440 if (SHW_PTE_IS_P(*pPteDst)) /** @todo Optimize accessed bit emulation? */
2441 {
2442 if (SHW_PTE_IS_TRACK_DIRTY(*pPteDst))
2443 {
2444 PPGMPAGE pPage = pgmPhysGetPage(pVM, GST_GET_PTE_GCPHYS(*pPteSrc));
2445 SHWPTE PteDst = *pPteDst;
2446
2447 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2448 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageTrap));
2449
2450 Assert(pPteSrc->n.u1Write);
2451
2452 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
2453 * entry will not harm; write access will simply fault again and
2454 * take this path to only invalidate the entry.
2455 */
2456 if (RT_LIKELY(pPage))
2457 {
2458 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2459 {
2460 AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
2461 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
2462 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2463 SHW_PTE_SET_RO(PteDst);
2464 }
2465 else
2466 {
2467 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2468 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2469 {
2470 rc = pgmPhysPageMakeWritable(pVM, pPage, GST_GET_PTE_GCPHYS(*pPteSrc));
2471 AssertRC(rc);
2472 }
2473 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2474 SHW_PTE_SET_RW(PteDst);
2475 else
2476 {
2477 /* Still applies to shared pages. */
2478 Assert(!PGM_PAGE_IS_ZERO(pPage));
2479 SHW_PTE_SET_RO(PteDst);
2480 }
2481 }
2482 }
2483 else
2484 SHW_PTE_SET_RW(PteDst); /** @todo r=bird: This doesn't make sense to me. */
2485
2486 SHW_PTE_SET(PteDst, (SHW_PTE_GET_U(PteDst) | X86_PTE_D | X86_PTE_A) & ~(uint64_t)PGM_PTFLAGS_TRACK_DIRTY);
2487 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
2488 PGM_INVL_PG(pVCpu, GCPtrPage);
2489 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2490 }
2491
2492# ifdef IN_RING0
2493 /* Check for stale TLB entry; only applies to the SMP guest case. */
2494 if ( pVM->cCpus > 1
2495 && SHW_PTE_IS_RW(*pPteDst)
2496 && SHW_PTE_IS_A(*pPteDst))
2497 {
2498 /* Stale TLB entry. */
2499 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageStale));
2500 PGM_INVL_PG(pVCpu, GCPtrPage);
2501 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2502 }
2503# endif
2504 }
2505 }
2506 else
2507 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2508 }
2509
2510 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2511}
2512
2513#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2514
2515
2516/**
2517 * Sync a shadow page table.
2518 *
2519 * The shadow page table is not present in the shadow PDE.
2520 *
2521 * Handles mapping conflicts.
2522 *
2523 * This is called by VerifyAccessSyncPage, PrefetchPage, InvalidatePage (on
2524 * conflict), and Trap0eHandler.
2525 *
2526 * A precondition for this method is that the shadow PDE is not present. The
2527 * caller must take the PGM lock before checking this and continue to hold it
2528 * when calling this method.
2529 *
2530 * @returns VBox status code.
2531 * @param pVCpu The VMCPU handle.
2532 * @param iPD Page directory index.
2533 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2534 * Assume this is a temporary mapping.
2535 * @param GCPtrPage GC Pointer of the page that caused the fault
2536 */
2537static int PGM_BTH_NAME(SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2538{
2539 PVM pVM = pVCpu->CTX_SUFF(pVM);
2540 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2541
2542#if 0 /* rarely useful; leave for debugging. */
2543 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2544#endif
2545 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2546
2547 PGM_LOCK_ASSERT_OWNER(pVM);
2548
2549#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2550 || PGM_GST_TYPE == PGM_TYPE_PAE \
2551 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2552 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2553 && PGM_SHW_TYPE != PGM_TYPE_EPT
2554
2555 int rc = VINF_SUCCESS;
2556
2557 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
2558
2559 /*
2560 * Some input validation first.
2561 */
2562 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2563
2564 /*
2565 * Get the relevant shadow PDE entry.
2566 */
2567# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2568 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2569 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
2570
2571 /* Fetch the pgm pool shadow descriptor. */
2572 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2573 Assert(pShwPde);
2574
2575# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2576 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2577 PPGMPOOLPAGE pShwPde = NULL;
2578 PX86PDPAE pPDDst;
2579 PSHWPDE pPdeDst;
2580
2581 /* Fetch the pgm pool shadow descriptor. */
2582 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
2583 AssertRCSuccessReturn(rc, rc);
2584 Assert(pShwPde);
2585
2586 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
2587 pPdeDst = &pPDDst->a[iPDDst];
2588
2589# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2590 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2591 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2592 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2593 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2594 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2595 AssertRCSuccessReturn(rc, rc);
2596 Assert(pPDDst);
2597 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2598# endif
2599 SHWPDE PdeDst = *pPdeDst;
2600
2601# if PGM_GST_TYPE == PGM_TYPE_AMD64
2602 /* Fetch the pgm pool shadow descriptor. */
2603 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2604 Assert(pShwPde);
2605# endif
2606
2607# ifndef PGM_WITHOUT_MAPPINGS
2608 /*
2609 * Check for conflicts.
2610 * RC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2611 * R3: Simply resolve the conflict.
2612 */
2613 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2614 {
2615 Assert(pgmMapAreMappingsEnabled(pVM));
2616# ifndef IN_RING3
2617 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2618 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
2619 return VERR_ADDRESS_CONFLICT;
2620
2621# else /* IN_RING3 */
2622 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2623 Assert(pMapping);
2624# if PGM_GST_TYPE == PGM_TYPE_32BIT
2625 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2626# elif PGM_GST_TYPE == PGM_TYPE_PAE
2627 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2628# else
2629 AssertFailed(); /* can't happen for amd64 */
2630# endif
2631 if (RT_FAILURE(rc))
2632 {
2633 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
2634 return rc;
2635 }
2636 PdeDst = *pPdeDst;
2637# endif /* IN_RING3 */
2638 }
2639# endif /* !PGM_WITHOUT_MAPPINGS */
2640 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2641
2642 /*
2643 * Sync the page directory entry.
2644 */
2645 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2646 const bool fPageTable = !PdeSrc.b.u1Size || !GST_IS_PSE_ACTIVE(pVCpu);
2647 if ( PdeSrc.n.u1Present
2648 && (fPageTable ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc)) )
2649 {
2650 /*
2651 * Allocate & map the page table.
2652 */
2653 PSHWPT pPTDst;
2654 PPGMPOOLPAGE pShwPage;
2655 RTGCPHYS GCPhys;
2656 if (fPageTable)
2657 {
2658 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
2659# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2660 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2661 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2662# endif
2663 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2664 }
2665 else
2666 {
2667 PGMPOOLACCESS enmAccess;
2668# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2669 const bool fNoExecute = PdeSrc.n.u1NoExecute && GST_IS_NX_ACTIVE(pVCpu);
2670# else
2671 const bool fNoExecute = false;
2672# endif
2673
2674 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
2675# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2676 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2677 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2678# endif
2679 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2680 if (PdeSrc.n.u1User)
2681 {
2682 if (PdeSrc.n.u1Write)
2683 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2684 else
2685 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2686 }
2687 else
2688 {
2689 if (PdeSrc.n.u1Write)
2690 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2691 else
2692 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2693 }
2694 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, false /*fLockPage*/,
2695 &pShwPage);
2696 }
2697 if (rc == VINF_SUCCESS)
2698 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2699 else if (rc == VINF_PGM_CACHED_PAGE)
2700 {
2701 /*
2702 * The PT was cached, just hook it up.
2703 */
2704 if (fPageTable)
2705 PdeDst.u = pShwPage->Core.Key | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2706 else
2707 {
2708 PdeDst.u = pShwPage->Core.Key | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2709 /* (see explanation and assumptions further down.) */
2710 if ( !PdeSrc.b.u1Dirty
2711 && PdeSrc.b.u1Write)
2712 {
2713 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageBig));
2714 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2715 PdeDst.b.u1Write = 0;
2716 }
2717 }
2718 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2719 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2720 return VINF_SUCCESS;
2721 }
2722 else if (rc == VERR_PGM_POOL_FLUSHED)
2723 {
2724 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2725 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2726 return VINF_PGM_SYNC_CR3;
2727 }
2728 else
2729 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2730 /** @todo Why do we bother preserving X86_PDE_AVL_MASK here?
2731 * Both PGM_PDFLAGS_MAPPING and PGM_PDFLAGS_TRACK_DIRTY should be
2732 * irrelevant at this point. */
2733 PdeDst.u &= X86_PDE_AVL_MASK;
2734 PdeDst.u |= pShwPage->Core.Key;
2735
2736 /*
2737 * Page directory has been accessed (this is a fault situation, remember).
2738 */
2739 /** @todo
2740 * Well, when the caller is PrefetchPage or InvalidatePage is isn't a
2741 * fault situation. What's more, the Trap0eHandler has already set the
2742 * accessed bit. So, it's actually just VerifyAccessSyncPage which
2743 * might need setting the accessed flag.
2744 *
2745 * The best idea is to leave this change to the caller and add an
2746 * assertion that it's set already. */
2747 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2748 if (fPageTable)
2749 {
2750 /*
2751 * Page table - 4KB.
2752 *
2753 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2754 */
2755 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2756 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2757 PGSTPT pPTSrc;
2758 rc = PGM_GCPHYS_2_PTR(pVM, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
2759 if (RT_SUCCESS(rc))
2760 {
2761 /*
2762 * Start by syncing the page directory entry so CSAM's TLB trick works.
2763 */
2764 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2765 | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2766 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2767 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2768
2769 /*
2770 * Directory/page user or supervisor privilege: (same goes for read/write)
2771 *
2772 * Directory Page Combined
2773 * U/S U/S U/S
2774 * 0 0 0
2775 * 0 1 0
2776 * 1 0 0
2777 * 1 1 1
2778 *
2779 * Simple AND operation. Table listed for completeness.
2780 *
2781 */
2782 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT4K));
2783# ifdef PGM_SYNC_N_PAGES
2784 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2785 unsigned iPTDst = iPTBase;
2786 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2787 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2788 iPTDst = 0;
2789 else
2790 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2791# else /* !PGM_SYNC_N_PAGES */
2792 unsigned iPTDst = 0;
2793 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2794# endif /* !PGM_SYNC_N_PAGES */
2795 RTGCPTR GCPtrCur = (GCPtrPage & ~(RTGCPTR)((1 << SHW_PD_SHIFT) - 1))
2796 | ((RTGCPTR)iPTDst << PAGE_SHIFT);
2797# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2798 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2799 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2800# else
2801 const unsigned offPTSrc = 0;
2802# endif
2803 for (; iPTDst < iPTDstEnd; iPTDst++, GCPtrCur += PAGE_SIZE)
2804 {
2805 const unsigned iPTSrc = iPTDst + offPTSrc;
2806 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2807
2808 if (PteSrc.n.u1Present)
2809 {
2810# ifndef IN_RING0
2811 /*
2812 * Assuming kernel code will be marked as supervisor - and not as user level
2813 * and executed using a conforming code selector - And marked as readonly.
2814 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2815 */
2816 PPGMPAGE pPage;
2817 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2818 || !CSAMDoesPageNeedScanning(pVM, GCPtrCur)
2819 || ( (pPage = pgmPhysGetPage(pVM, GST_GET_PTE_GCPHYS(PteSrc)))
2820 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2821 )
2822# endif
2823 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2824 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2825 GCPtrCur,
2826 PteSrc.n.u1Present,
2827 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2828 PteSrc.n.u1User & PdeSrc.n.u1User,
2829 (uint64_t)PteSrc.u,
2830 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : "", SHW_PTE_LOG64(pPTDst->a[iPTDst]), iPTSrc, PdeSrc.au32[0],
2831 (RTGCPHYS)(GST_GET_PDE_GCPHYS(PdeSrc) + iPTSrc*sizeof(PteSrc)) ));
2832 }
2833 /* else: the page table was cleared by the pool */
2834 } /* for PTEs */
2835 }
2836 }
2837 else
2838 {
2839 /*
2840 * Big page - 2/4MB.
2841 *
2842 * We'll walk the ram range list in parallel and optimize lookups.
2843 * We will only sync one shadow page table at a time.
2844 */
2845 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT4M));
2846
2847 /**
2848 * @todo It might be more efficient to sync only a part of the 4MB
2849 * page (similar to what we do for 4KB PDs).
2850 */
2851
2852 /*
2853 * Start by syncing the page directory entry.
2854 */
2855 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2856 | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2857
2858 /*
2859 * If the page is not flagged as dirty and is writable, then make it read-only
2860 * at PD level, so we can set the dirty bit when the page is modified.
2861 *
2862 * ASSUMES that page access handlers are implemented on page table entry level.
2863 * Thus we will first catch the dirty access and set PDE.D and restart. If
2864 * there is an access handler, we'll trap again and let it work on the problem.
2865 */
2866 /** @todo move the above stuff to a section in the PGM documentation. */
2867 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2868 if ( !PdeSrc.b.u1Dirty
2869 && PdeSrc.b.u1Write)
2870 {
2871 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageBig));
2872 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2873 PdeDst.b.u1Write = 0;
2874 }
2875 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2876 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2877
2878 /*
2879 * Fill the shadow page table.
2880 */
2881 /* Get address and flags from the source PDE. */
2882 SHWPTE PteDstBase;
2883 SHW_PTE_SET(PteDstBase, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc));
2884
2885 /* Loop thru the entries in the shadow PT. */
2886 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2887 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2888 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2889 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2890 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
2891 unsigned iPTDst = 0;
2892 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2893 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2894 {
2895 if (pRam && GCPhys >= pRam->GCPhys)
2896 {
2897 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2898 do
2899 {
2900 /* Make shadow PTE. */
2901 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2902 SHWPTE PteDst;
2903
2904# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2905 /* Try to make the page writable if necessary. */
2906 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2907 && ( PGM_PAGE_IS_ZERO(pPage)
2908 || ( SHW_PTE_IS_RW(PteDstBase)
2909 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2910# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2911 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2912# endif
2913# ifdef VBOX_WITH_PAGE_SHARING
2914 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2915# endif
2916 && !PGM_PAGE_IS_BALLOONED(pPage))
2917 )
2918 )
2919 {
2920 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2921 AssertRCReturn(rc, rc);
2922 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2923 break;
2924 }
2925# endif
2926
2927 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2928 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, SHW_PTE_GET_U(PteDstBase), &PteDst);
2929 else if (PGM_PAGE_IS_BALLOONED(pPage))
2930 SHW_PTE_SET(PteDst, 0); /* Handle ballooned pages at #PF time. */
2931# ifndef IN_RING0
2932 /*
2933 * Assuming kernel code will be marked as supervisor and not as user level and executed
2934 * using a conforming code selector. Don't check for readonly, as that implies the whole
2935 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2936 */
2937 else if ( !PdeSrc.n.u1User
2938 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
2939 SHW_PTE_SET(PteDst, 0);
2940# endif
2941 else
2942 SHW_PTE_SET(PteDst, PGM_PAGE_GET_HCPHYS(pPage) | SHW_PTE_GET_U(PteDstBase));
2943
2944 /* Only map writable pages writable. */
2945 if ( SHW_PTE_IS_P_RW(PteDst)
2946 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2947 {
2948 /* Still applies to shared pages. */
2949 Assert(!PGM_PAGE_IS_ZERO(pPage));
2950 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2951 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2952 }
2953
2954 if (SHW_PTE_IS_P(PteDst))
2955 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2956
2957 /* commit it (not atomic, new table) */
2958 pPTDst->a[iPTDst] = PteDst;
2959 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2960 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), SHW_PTE_IS_P(PteDst), SHW_PTE_IS_RW(PteDst), SHW_PTE_IS_US(PteDst), SHW_PTE_LOG64(PteDst),
2961 SHW_PTE_IS_TRACK_DIRTY(PteDst) ? " Track-Dirty" : ""));
2962
2963 /* advance */
2964 GCPhys += PAGE_SIZE;
2965 iHCPage++;
2966 iPTDst++;
2967 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2968 && GCPhys <= pRam->GCPhysLast);
2969
2970 /* Advance ram range list. */
2971 while (pRam && GCPhys > pRam->GCPhysLast)
2972 pRam = pRam->CTX_SUFF(pNext);
2973 }
2974 else if (pRam)
2975 {
2976 Log(("Invalid pages at %RGp\n", GCPhys));
2977 do
2978 {
2979 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2980 GCPhys += PAGE_SIZE;
2981 iPTDst++;
2982 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2983 && GCPhys < pRam->GCPhys);
2984 }
2985 else
2986 {
2987 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2988 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2989 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2990 }
2991 } /* while more PTEs */
2992 } /* 4KB / 4MB */
2993 }
2994 else
2995 AssertRelease(!PdeDst.n.u1Present);
2996
2997 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
2998 if (RT_FAILURE(rc))
2999 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPTFailed));
3000 return rc;
3001
3002#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
3003 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3004 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
3005 && !defined(IN_RC)
3006
3007 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3008
3009 /*
3010 * Validate input a little bit.
3011 */
3012 int rc = VINF_SUCCESS;
3013# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3014 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3015 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3016
3017 /* Fetch the pgm pool shadow descriptor. */
3018 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
3019 Assert(pShwPde);
3020
3021# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3022 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3023 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
3024 PX86PDPAE pPDDst;
3025 PSHWPDE pPdeDst;
3026
3027 /* Fetch the pgm pool shadow descriptor. */
3028 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
3029 AssertRCSuccessReturn(rc, rc);
3030 Assert(pShwPde);
3031
3032 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
3033 pPdeDst = &pPDDst->a[iPDDst];
3034
3035# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3036 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
3037 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3038 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3039 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3040 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3041 AssertRCSuccessReturn(rc, rc);
3042 Assert(pPDDst);
3043 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3044
3045 /* Fetch the pgm pool shadow descriptor. */
3046 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3047 Assert(pShwPde);
3048
3049# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3050 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3051 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3052 PEPTPD pPDDst;
3053 PEPTPDPT pPdptDst;
3054
3055 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3056 if (rc != VINF_SUCCESS)
3057 {
3058 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3059 AssertRC(rc);
3060 return rc;
3061 }
3062 Assert(pPDDst);
3063 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3064
3065 /* Fetch the pgm pool shadow descriptor. */
3066 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3067 Assert(pShwPde);
3068# endif
3069 SHWPDE PdeDst = *pPdeDst;
3070
3071 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
3072 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3073
3074# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
3075 if (BTH_IS_NP_ACTIVE(pVM))
3076 {
3077 /* Check if we allocated a big page before for this 2 MB range. */
3078 PPGMPAGE pPage;
3079 rc = pgmPhysGetPageEx(pVM, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
3080 if (RT_SUCCESS(rc))
3081 {
3082 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3083 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3084 {
3085 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3086 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3087 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3088 }
3089 else if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
3090 {
3091 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3092 rc = pgmPhysRecheckLargePage(pVM, GCPtrPage, pPage);
3093 if (RT_SUCCESS(rc))
3094 {
3095 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3096 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3097 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3098 }
3099 }
3100 else if (PGMIsUsingLargePages(pVM))
3101 {
3102 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3103 if (RT_SUCCESS(rc))
3104 {
3105 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3106 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3107 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3108 }
3109 else
3110 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3111 }
3112
3113 if (HCPhys != NIL_RTHCPHYS)
3114 {
3115 PdeDst.u &= X86_PDE_AVL_MASK;
3116 PdeDst.u |= HCPhys;
3117 PdeDst.n.u1Present = 1;
3118 PdeDst.n.u1Write = 1;
3119 PdeDst.b.u1Size = 1;
3120# if PGM_SHW_TYPE == PGM_TYPE_EPT
3121 PdeDst.n.u1Execute = 1;
3122 PdeDst.b.u1IgnorePAT = 1;
3123 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3124# else
3125 PdeDst.n.u1User = 1;
3126# endif
3127 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3128
3129 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3130 /* Add a reference to the first page only. */
3131 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3132
3133 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3134 return VINF_SUCCESS;
3135 }
3136 }
3137 }
3138# endif /* HC_ARCH_BITS == 64 */
3139
3140 /*
3141 * Allocate & map the page table.
3142 */
3143 PSHWPT pPTDst;
3144 PPGMPOOLPAGE pShwPage;
3145 RTGCPHYS GCPhys;
3146
3147 /* Virtual address = physical address */
3148 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3149 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3150
3151 if ( rc == VINF_SUCCESS
3152 || rc == VINF_PGM_CACHED_PAGE)
3153 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
3154 else
3155 {
3156 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3157 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3158 }
3159
3160 if (rc == VINF_SUCCESS)
3161 {
3162 /* New page table; fully set it up. */
3163 Assert(pPTDst);
3164
3165 /* Mask away the page offset. */
3166 GCPtrPage &= ~((RTGCPTR)0xfff);
3167
3168 for (unsigned iPTDst = 0; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
3169 {
3170 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
3171
3172 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
3173 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
3174 GCPtrCurPage,
3175 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
3176 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
3177
3178 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
3179 break;
3180 }
3181 }
3182 else
3183 rc = VINF_SUCCESS; /* Cached entry; assume it's still fully valid. */
3184
3185 /* Save the new PDE. */
3186 PdeDst.u &= X86_PDE_AVL_MASK;
3187 PdeDst.u |= pShwPage->Core.Key;
3188 PdeDst.n.u1Present = 1;
3189 PdeDst.n.u1Write = 1;
3190# if PGM_SHW_TYPE == PGM_TYPE_EPT
3191 PdeDst.n.u1Execute = 1;
3192# else
3193 PdeDst.n.u1User = 1;
3194 PdeDst.n.u1Accessed = 1;
3195# endif
3196 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3197
3198 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3199 if (RT_FAILURE(rc))
3200 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPTFailed));
3201 return rc;
3202
3203#else
3204 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3205 return VERR_INTERNAL_ERROR;
3206#endif
3207}
3208
3209
3210
3211/**
3212 * Prefetch a page/set of pages.
3213 *
3214 * Typically used to sync commonly used pages before entering raw mode
3215 * after a CR3 reload.
3216 *
3217 * @returns VBox status code.
3218 * @param pVCpu The VMCPU handle.
3219 * @param GCPtrPage Page to invalidate.
3220 */
3221PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3222{
3223#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3224 || PGM_GST_TYPE == PGM_TYPE_REAL \
3225 || PGM_GST_TYPE == PGM_TYPE_PROT \
3226 || PGM_GST_TYPE == PGM_TYPE_PAE \
3227 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3228 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3229 && PGM_SHW_TYPE != PGM_TYPE_EPT
3230
3231 /*
3232 * Check that all Guest levels thru the PDE are present, getting the
3233 * PD and PDE in the processes.
3234 */
3235 int rc = VINF_SUCCESS;
3236# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3237# if PGM_GST_TYPE == PGM_TYPE_32BIT
3238 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3239 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3240# elif PGM_GST_TYPE == PGM_TYPE_PAE
3241 unsigned iPDSrc;
3242 X86PDPE PdpeSrc;
3243 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3244 if (!pPDSrc)
3245 return VINF_SUCCESS; /* not present */
3246# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3247 unsigned iPDSrc;
3248 PX86PML4E pPml4eSrc;
3249 X86PDPE PdpeSrc;
3250 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3251 if (!pPDSrc)
3252 return VINF_SUCCESS; /* not present */
3253# endif
3254 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3255# else
3256 PGSTPD pPDSrc = NULL;
3257 const unsigned iPDSrc = 0;
3258 GSTPDE PdeSrc;
3259
3260 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3261 PdeSrc.n.u1Present = 1;
3262 PdeSrc.n.u1Write = 1;
3263 PdeSrc.n.u1Accessed = 1;
3264 PdeSrc.n.u1User = 1;
3265# endif
3266
3267 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3268 {
3269 PVM pVM = pVCpu->CTX_SUFF(pVM);
3270 pgmLock(pVM);
3271
3272# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3273 const X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
3274# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3275 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3276 PX86PDPAE pPDDst;
3277 X86PDEPAE PdeDst;
3278# if PGM_GST_TYPE != PGM_TYPE_PAE
3279 X86PDPE PdpeSrc;
3280
3281 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3282 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3283# endif
3284 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3285 if (rc != VINF_SUCCESS)
3286 {
3287 pgmUnlock(pVM);
3288 AssertRC(rc);
3289 return rc;
3290 }
3291 Assert(pPDDst);
3292 PdeDst = pPDDst->a[iPDDst];
3293
3294# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3295 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3296 PX86PDPAE pPDDst;
3297 X86PDEPAE PdeDst;
3298
3299# if PGM_GST_TYPE == PGM_TYPE_PROT
3300 /* AMD-V nested paging */
3301 X86PML4E Pml4eSrc;
3302 X86PDPE PdpeSrc;
3303 PX86PML4E pPml4eSrc = &Pml4eSrc;
3304
3305 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3306 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3307 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3308# endif
3309
3310 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3311 if (rc != VINF_SUCCESS)
3312 {
3313 pgmUnlock(pVM);
3314 AssertRC(rc);
3315 return rc;
3316 }
3317 Assert(pPDDst);
3318 PdeDst = pPDDst->a[iPDDst];
3319# endif
3320 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3321 {
3322 if (!PdeDst.n.u1Present)
3323 {
3324 /** @todo r=bird: This guy will set the A bit on the PDE,
3325 * probably harmless. */
3326 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3327 }
3328 else
3329 {
3330 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3331 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3332 * makes no sense to prefetch more than one page.
3333 */
3334 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3335 if (RT_SUCCESS(rc))
3336 rc = VINF_SUCCESS;
3337 }
3338 }
3339 pgmUnlock(pVM);
3340 }
3341 return rc;
3342
3343#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3344 return VINF_SUCCESS; /* ignore */
3345#else
3346 AssertCompile(0);
3347#endif
3348}
3349
3350
3351
3352
3353/**
3354 * Syncs a page during a PGMVerifyAccess() call.
3355 *
3356 * @returns VBox status code (informational included).
3357 * @param pVCpu The VMCPU handle.
3358 * @param GCPtrPage The address of the page to sync.
3359 * @param fPage The effective guest page flags.
3360 * @param uErr The trap error code.
3361 * @remarks This will normally never be called on invalid guest page
3362 * translation entries.
3363 */
3364PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3365{
3366 PVM pVM = pVCpu->CTX_SUFF(pVM);
3367
3368 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3369
3370 Assert(!pVM->pgm.s.fNestedPaging);
3371#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3372 || PGM_GST_TYPE == PGM_TYPE_REAL \
3373 || PGM_GST_TYPE == PGM_TYPE_PROT \
3374 || PGM_GST_TYPE == PGM_TYPE_PAE \
3375 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3376 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3377 && PGM_SHW_TYPE != PGM_TYPE_EPT
3378
3379# ifndef IN_RING0
3380 if (!(fPage & X86_PTE_US))
3381 {
3382 /*
3383 * Mark this page as safe.
3384 */
3385 /** @todo not correct for pages that contain both code and data!! */
3386 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3387 CSAMMarkPage(pVM, GCPtrPage, true);
3388 }
3389# endif
3390
3391 /*
3392 * Get guest PD and index.
3393 */
3394 /** @todo Performance: We've done all this a jiffy ago in the
3395 * PGMGstGetPage call. */
3396# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3397# if PGM_GST_TYPE == PGM_TYPE_32BIT
3398 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3399 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3400
3401# elif PGM_GST_TYPE == PGM_TYPE_PAE
3402 unsigned iPDSrc = 0;
3403 X86PDPE PdpeSrc;
3404 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3405 if (RT_UNLIKELY(!pPDSrc))
3406 {
3407 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3408 return VINF_EM_RAW_GUEST_TRAP;
3409 }
3410
3411# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3412 unsigned iPDSrc = 0; /* shut up gcc */
3413 PX86PML4E pPml4eSrc = NULL; /* ditto */
3414 X86PDPE PdpeSrc;
3415 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3416 if (RT_UNLIKELY(!pPDSrc))
3417 {
3418 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3419 return VINF_EM_RAW_GUEST_TRAP;
3420 }
3421# endif
3422
3423# else /* !PGM_WITH_PAGING */
3424 PGSTPD pPDSrc = NULL;
3425 const unsigned iPDSrc = 0;
3426# endif /* !PGM_WITH_PAGING */
3427 int rc = VINF_SUCCESS;
3428
3429 pgmLock(pVM);
3430
3431 /*
3432 * First check if the shadow pd is present.
3433 */
3434# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3435 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3436
3437# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3438 PX86PDEPAE pPdeDst;
3439 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3440 PX86PDPAE pPDDst;
3441# if PGM_GST_TYPE != PGM_TYPE_PAE
3442 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3443 X86PDPE PdpeSrc;
3444 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3445# endif
3446 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3447 if (rc != VINF_SUCCESS)
3448 {
3449 pgmUnlock(pVM);
3450 AssertRC(rc);
3451 return rc;
3452 }
3453 Assert(pPDDst);
3454 pPdeDst = &pPDDst->a[iPDDst];
3455
3456# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3457 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3458 PX86PDPAE pPDDst;
3459 PX86PDEPAE pPdeDst;
3460
3461# if PGM_GST_TYPE == PGM_TYPE_PROT
3462 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3463 X86PML4E Pml4eSrc;
3464 X86PDPE PdpeSrc;
3465 PX86PML4E pPml4eSrc = &Pml4eSrc;
3466 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3467 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3468# endif
3469
3470 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3471 if (rc != VINF_SUCCESS)
3472 {
3473 pgmUnlock(pVM);
3474 AssertRC(rc);
3475 return rc;
3476 }
3477 Assert(pPDDst);
3478 pPdeDst = &pPDDst->a[iPDDst];
3479# endif
3480
3481 if (!pPdeDst->n.u1Present)
3482 {
3483 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3484 if (rc != VINF_SUCCESS)
3485 {
3486 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3487 pgmUnlock(pVM);
3488 AssertRC(rc);
3489 return rc;
3490 }
3491 }
3492
3493# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3494 /* Check for dirty bit fault */
3495 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3496 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3497 Log(("PGMVerifyAccess: success (dirty)\n"));
3498 else
3499# endif
3500 {
3501# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3502 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3503# else
3504 GSTPDE PdeSrc;
3505 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3506 PdeSrc.n.u1Present = 1;
3507 PdeSrc.n.u1Write = 1;
3508 PdeSrc.n.u1Accessed = 1;
3509 PdeSrc.n.u1User = 1;
3510# endif
3511
3512 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3513 if (uErr & X86_TRAP_PF_US)
3514 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncUser));
3515 else /* supervisor */
3516 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3517
3518 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3519 if (RT_SUCCESS(rc))
3520 {
3521 /* Page was successfully synced */
3522 Log2(("PGMVerifyAccess: success (sync)\n"));
3523 rc = VINF_SUCCESS;
3524 }
3525 else
3526 {
3527 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
3528 rc = VINF_EM_RAW_GUEST_TRAP;
3529 }
3530 }
3531 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3532 pgmUnlock(pVM);
3533 return rc;
3534
3535#else /* PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_NESTED */
3536
3537 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3538 return VERR_INTERNAL_ERROR;
3539#endif /* PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_NESTED */
3540}
3541
3542
3543/**
3544 * Syncs the paging hierarchy starting at CR3.
3545 *
3546 * @returns VBox status code, no specials.
3547 * @param pVCpu The VMCPU handle.
3548 * @param cr0 Guest context CR0 register
3549 * @param cr3 Guest context CR3 register
3550 * @param cr4 Guest context CR4 register
3551 * @param fGlobal Including global page directories or not
3552 */
3553PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3554{
3555 PVM pVM = pVCpu->CTX_SUFF(pVM);
3556
3557 LogFlow(("SyncCR3 fGlobal=%d\n", !!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
3558
3559#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3560
3561 pgmLock(pVM);
3562
3563# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3564 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3565 if (pPool->cDirtyPages)
3566 pgmPoolResetDirtyPages(pVM);
3567# endif
3568
3569 /*
3570 * Update page access handlers.
3571 * The virtual are always flushed, while the physical are only on demand.
3572 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3573 * have to look into that later because it will have a bad influence on the performance.
3574 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3575 * bird: Yes, but that won't work for aliases.
3576 */
3577 /** @todo this MUST go away. See #1557. */
3578 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncCR3Handlers), h);
3579 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3580 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncCR3Handlers), h);
3581 pgmUnlock(pVM);
3582#endif /* !NESTED && !EPT */
3583
3584#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3585 /*
3586 * Nested / EPT - almost no work.
3587 */
3588 Assert(!pgmMapAreMappingsEnabled(pVM));
3589 return VINF_SUCCESS;
3590
3591#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3592 /*
3593 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3594 * out the shadow parts when the guest modifies its tables.
3595 */
3596 Assert(!pgmMapAreMappingsEnabled(pVM));
3597 return VINF_SUCCESS;
3598
3599#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3600
3601# ifndef PGM_WITHOUT_MAPPINGS
3602 /*
3603 * Check for and resolve conflicts with our guest mappings if they
3604 * are enabled and not fixed.
3605 */
3606 if (pgmMapAreMappingsFloating(pVM))
3607 {
3608 int rc = pgmMapResolveConflicts(pVM);
3609 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3610 if (rc == VINF_PGM_SYNC_CR3)
3611 {
3612 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3613 return VINF_PGM_SYNC_CR3;
3614 }
3615 }
3616# else
3617 Assert(!pgmMapAreMappingsEnabled(pVM));
3618# endif
3619 return VINF_SUCCESS;
3620#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3621}
3622
3623
3624
3625
3626#ifdef VBOX_STRICT
3627# ifdef IN_RC
3628# undef AssertMsgFailed
3629# define AssertMsgFailed Log
3630# endif
3631
3632/**
3633 * Checks that the shadow page table is in sync with the guest one.
3634 *
3635 * @returns The number of errors.
3636 * @param pVM The virtual machine.
3637 * @param pVCpu The VMCPU handle.
3638 * @param cr3 Guest context CR3 register
3639 * @param cr4 Guest context CR4 register
3640 * @param GCPtr Where to start. Defaults to 0.
3641 * @param cb How much to check. Defaults to everything.
3642 */
3643PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3644{
3645#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3646 return 0;
3647#else
3648 unsigned cErrors = 0;
3649 PVM pVM = pVCpu->CTX_SUFF(pVM);
3650 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3651
3652#if PGM_GST_TYPE == PGM_TYPE_PAE
3653 /** @todo currently broken; crashes below somewhere */
3654 AssertFailed();
3655#endif
3656
3657#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3658 || PGM_GST_TYPE == PGM_TYPE_PAE \
3659 || PGM_GST_TYPE == PGM_TYPE_AMD64
3660
3661 bool fBigPagesSupported = GST_IS_PSE_ACTIVE(pVCpu);
3662 PPGMCPU pPGM = &pVCpu->pgm.s;
3663 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3664 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3665# ifndef IN_RING0
3666 RTHCPHYS HCPhys; /* general usage. */
3667# endif
3668 int rc;
3669
3670 /*
3671 * Check that the Guest CR3 and all its mappings are correct.
3672 */
3673 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3674 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3675 false);
3676# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3677# if PGM_GST_TYPE == PGM_TYPE_32BIT
3678 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3679# else
3680 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3681# endif
3682 AssertRCReturn(rc, 1);
3683 HCPhys = NIL_RTHCPHYS;
3684 rc = pgmRamGCPhys2HCPhys(pVM, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3685 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3686# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3687 pgmGstGet32bitPDPtr(pVCpu);
3688 RTGCPHYS GCPhys;
3689 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3690 AssertRCReturn(rc, 1);
3691 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3692# endif
3693# endif /* !IN_RING0 */
3694
3695 /*
3696 * Get and check the Shadow CR3.
3697 */
3698# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3699 unsigned cPDEs = X86_PG_ENTRIES;
3700 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3701# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3702# if PGM_GST_TYPE == PGM_TYPE_32BIT
3703 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3704# else
3705 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3706# endif
3707 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3708# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3709 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3710 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3711# endif
3712 if (cb != ~(RTGCPTR)0)
3713 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3714
3715/** @todo call the other two PGMAssert*() functions. */
3716
3717# if PGM_GST_TYPE == PGM_TYPE_AMD64
3718 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3719
3720 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3721 {
3722 PPGMPOOLPAGE pShwPdpt = NULL;
3723 PX86PML4E pPml4eSrc;
3724 PX86PML4E pPml4eDst;
3725 RTGCPHYS GCPhysPdptSrc;
3726
3727 pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
3728 pPml4eDst = pgmShwGetLongModePML4EPtr(pVCpu, iPml4);
3729
3730 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3731 if (!pPml4eDst->n.u1Present)
3732 {
3733 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3734 continue;
3735 }
3736
3737 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3738 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK;
3739
3740 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3741 {
3742 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3743 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3744 cErrors++;
3745 continue;
3746 }
3747
3748 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3749 {
3750 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3751 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3752 cErrors++;
3753 continue;
3754 }
3755
3756 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3757 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3758 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3759 {
3760 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3761 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3762 cErrors++;
3763 continue;
3764 }
3765# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3766 {
3767# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3768
3769# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3770 /*
3771 * Check the PDPTEs too.
3772 */
3773 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3774
3775 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3776 {
3777 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3778 PPGMPOOLPAGE pShwPde = NULL;
3779 PX86PDPE pPdpeDst;
3780 RTGCPHYS GCPhysPdeSrc;
3781 X86PDPE PdpeSrc;
3782 PdpeSrc.u = 0; /* initialized to shut up gcc 4.5 */
3783# if PGM_GST_TYPE == PGM_TYPE_PAE
3784 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
3785 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
3786# else
3787 PX86PML4E pPml4eSrcIgn;
3788 PX86PDPT pPdptDst;
3789 PX86PDPAE pPDDst;
3790 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3791
3792 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3793 if (rc != VINF_SUCCESS)
3794 {
3795 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3796 GCPtr += 512 * _2M;
3797 continue; /* next PDPTE */
3798 }
3799 Assert(pPDDst);
3800# endif
3801 Assert(iPDSrc == 0);
3802
3803 pPdpeDst = &pPdptDst->a[iPdpt];
3804
3805 if (!pPdpeDst->n.u1Present)
3806 {
3807 GCPtr += 512 * _2M;
3808 continue; /* next PDPTE */
3809 }
3810
3811 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3812 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3813
3814 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3815 {
3816 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3817 GCPtr += 512 * _2M;
3818 cErrors++;
3819 continue;
3820 }
3821
3822 if (GCPhysPdeSrc != pShwPde->GCPhys)
3823 {
3824# if PGM_GST_TYPE == PGM_TYPE_AMD64
3825 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3826# else
3827 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3828# endif
3829 GCPtr += 512 * _2M;
3830 cErrors++;
3831 continue;
3832 }
3833
3834# if PGM_GST_TYPE == PGM_TYPE_AMD64
3835 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3836 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3837 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3838 {
3839 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3840 GCPtr += 512 * _2M;
3841 cErrors++;
3842 continue;
3843 }
3844# endif
3845
3846# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3847 {
3848# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3849# if PGM_GST_TYPE == PGM_TYPE_32BIT
3850 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3851# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3852 PCX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
3853# endif
3854# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3855 /*
3856 * Iterate the shadow page directory.
3857 */
3858 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3859 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3860
3861 for (;
3862 iPDDst < cPDEs;
3863 iPDDst++, GCPtr += cIncrement)
3864 {
3865# if PGM_SHW_TYPE == PGM_TYPE_PAE
3866 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pVCpu, GCPtr);
3867# else
3868 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3869# endif
3870 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3871 {
3872 Assert(pgmMapAreMappingsEnabled(pVM));
3873 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3874 {
3875 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3876 cErrors++;
3877 continue;
3878 }
3879 }
3880 else if ( (PdeDst.u & X86_PDE_P)
3881 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3882 )
3883 {
3884 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3885 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3886 if (!pPoolPage)
3887 {
3888 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3889 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3890 cErrors++;
3891 continue;
3892 }
3893 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pPoolPage);
3894
3895 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3896 {
3897 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3898 GCPtr, (uint64_t)PdeDst.u));
3899 cErrors++;
3900 }
3901
3902 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3903 {
3904 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3905 GCPtr, (uint64_t)PdeDst.u));
3906 cErrors++;
3907 }
3908
3909 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3910 if (!PdeSrc.n.u1Present)
3911 {
3912 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3913 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3914 cErrors++;
3915 continue;
3916 }
3917
3918 if ( !PdeSrc.b.u1Size
3919 || !fBigPagesSupported)
3920 {
3921 GCPhysGst = GST_GET_PDE_GCPHYS(PdeSrc);
3922# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3923 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3924# endif
3925 }
3926 else
3927 {
3928# if PGM_GST_TYPE == PGM_TYPE_32BIT
3929 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3930 {
3931 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3932 GCPtr, (uint64_t)PdeSrc.u));
3933 cErrors++;
3934 continue;
3935 }
3936# endif
3937 GCPhysGst = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
3938# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3939 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3940# endif
3941 }
3942
3943 if ( pPoolPage->enmKind
3944 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3945 {
3946 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3947 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3948 cErrors++;
3949 }
3950
3951 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
3952 if (!pPhysPage)
3953 {
3954 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3955 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3956 cErrors++;
3957 continue;
3958 }
3959
3960 if (GCPhysGst != pPoolPage->GCPhys)
3961 {
3962 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3963 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3964 cErrors++;
3965 continue;
3966 }
3967
3968 if ( !PdeSrc.b.u1Size
3969 || !fBigPagesSupported)
3970 {
3971 /*
3972 * Page Table.
3973 */
3974 const GSTPT *pPTSrc;
3975 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3976 if (RT_FAILURE(rc))
3977 {
3978 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3979 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3980 cErrors++;
3981 continue;
3982 }
3983 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3984 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3985 {
3986 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3987 // (This problem will go away when/if we shadow multiple CR3s.)
3988 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3989 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3990 cErrors++;
3991 continue;
3992 }
3993 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3994 {
3995 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3996 GCPtr, (uint64_t)PdeDst.u));
3997 cErrors++;
3998 continue;
3999 }
4000
4001 /* iterate the page table. */
4002# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4003 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
4004 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
4005# else
4006 const unsigned offPTSrc = 0;
4007# endif
4008 for (unsigned iPT = 0, off = 0;
4009 iPT < RT_ELEMENTS(pPTDst->a);
4010 iPT++, off += PAGE_SIZE)
4011 {
4012 const SHWPTE PteDst = pPTDst->a[iPT];
4013
4014 /* skip not-present and dirty tracked entries. */
4015 if (!(SHW_PTE_GET_U(PteDst) & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
4016 continue;
4017 Assert(SHW_PTE_IS_P(PteDst));
4018
4019 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
4020 if (!PteSrc.n.u1Present)
4021 {
4022# ifdef IN_RING3
4023 PGMAssertHandlerAndFlagsInSync(pVM);
4024 DBGFR3PagingDumpEx(pVM, pVCpu->idCpu, DBGFPGDMP_FLAGS_CURRENT_CR3 | DBGFPGDMP_FLAGS_CURRENT_MODE
4025 | DBGFPGDMP_FLAGS_GUEST | DBGFPGDMP_FLAGS_HEADER | DBGFPGDMP_FLAGS_PRINT_CR3,
4026 0, 0, UINT64_MAX, 99, NULL);
4027# endif
4028 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
4029 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst), pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
4030 (uint64_t)GST_GET_PDE_GCPHYS(PdeSrc) + (iPT + offPTSrc)*sizeof(PteSrc)));
4031 cErrors++;
4032 continue;
4033 }
4034
4035 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
4036# if 1 /** @todo sync accessed bit properly... */
4037 fIgnoreFlags |= X86_PTE_A;
4038# endif
4039
4040 /* match the physical addresses */
4041 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
4042 GCPhysGst = GST_GET_PTE_GCPHYS(PteSrc);
4043
4044# ifdef IN_RING3
4045 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4046 if (RT_FAILURE(rc))
4047 {
4048 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4049 {
4050 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4051 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4052 cErrors++;
4053 continue;
4054 }
4055 }
4056 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4057 {
4058 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4059 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4060 cErrors++;
4061 continue;
4062 }
4063# endif
4064
4065 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
4066 if (!pPhysPage)
4067 {
4068# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4069 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4070 {
4071 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4072 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4073 cErrors++;
4074 continue;
4075 }
4076# endif
4077 if (SHW_PTE_IS_RW(PteDst))
4078 {
4079 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4080 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4081 cErrors++;
4082 }
4083 fIgnoreFlags |= X86_PTE_RW;
4084 }
4085 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4086 {
4087 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4088 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4089 cErrors++;
4090 continue;
4091 }
4092
4093 /* flags */
4094 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4095 {
4096 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4097 {
4098 if (SHW_PTE_IS_RW(PteDst))
4099 {
4100 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4101 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4102 cErrors++;
4103 continue;
4104 }
4105 fIgnoreFlags |= X86_PTE_RW;
4106 }
4107 else
4108 {
4109 if ( SHW_PTE_IS_P(PteDst)
4110# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4111 && !PGM_PAGE_IS_MMIO(pPhysPage)
4112# endif
4113 )
4114 {
4115 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4116 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4117 cErrors++;
4118 continue;
4119 }
4120 fIgnoreFlags |= X86_PTE_P;
4121 }
4122 }
4123 else
4124 {
4125 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4126 {
4127 if (SHW_PTE_IS_RW(PteDst))
4128 {
4129 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4130 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4131 cErrors++;
4132 continue;
4133 }
4134 if (!SHW_PTE_IS_TRACK_DIRTY(PteDst))
4135 {
4136 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4137 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4138 cErrors++;
4139 continue;
4140 }
4141 if (SHW_PTE_IS_D(PteDst))
4142 {
4143 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4144 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4145 cErrors++;
4146 }
4147# if 0 /** @todo sync access bit properly... */
4148 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4149 {
4150 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4151 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4152 cErrors++;
4153 }
4154 fIgnoreFlags |= X86_PTE_RW;
4155# else
4156 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4157# endif
4158 }
4159 else if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
4160 {
4161 /* access bit emulation (not implemented). */
4162 if (PteSrc.n.u1Accessed || SHW_PTE_IS_P(PteDst))
4163 {
4164 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4165 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4166 cErrors++;
4167 continue;
4168 }
4169 if (!SHW_PTE_IS_A(PteDst))
4170 {
4171 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4172 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4173 cErrors++;
4174 }
4175 fIgnoreFlags |= X86_PTE_P;
4176 }
4177# ifdef DEBUG_sandervl
4178 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4179# endif
4180 }
4181
4182 if ( (PteSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4183 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4184 )
4185 {
4186 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4187 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4188 fIgnoreFlags, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4189 cErrors++;
4190 continue;
4191 }
4192 } /* foreach PTE */
4193 }
4194 else
4195 {
4196 /*
4197 * Big Page.
4198 */
4199 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4200 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4201 {
4202 if (PdeDst.n.u1Write)
4203 {
4204 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4205 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4206 cErrors++;
4207 continue;
4208 }
4209 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4210 {
4211 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4212 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4213 cErrors++;
4214 continue;
4215 }
4216# if 0 /** @todo sync access bit properly... */
4217 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4218 {
4219 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4220 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4221 cErrors++;
4222 }
4223 fIgnoreFlags |= X86_PTE_RW;
4224# else
4225 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4226# endif
4227 }
4228 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4229 {
4230 /* access bit emulation (not implemented). */
4231 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4232 {
4233 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4234 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4235 cErrors++;
4236 continue;
4237 }
4238 if (!PdeDst.n.u1Accessed)
4239 {
4240 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4241 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4242 cErrors++;
4243 }
4244 fIgnoreFlags |= X86_PTE_P;
4245 }
4246
4247 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4248 {
4249 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4250 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4251 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4252 cErrors++;
4253 }
4254
4255 /* iterate the page table. */
4256 for (unsigned iPT = 0, off = 0;
4257 iPT < RT_ELEMENTS(pPTDst->a);
4258 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4259 {
4260 const SHWPTE PteDst = pPTDst->a[iPT];
4261
4262 if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
4263 {
4264 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4265 GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4266 cErrors++;
4267 }
4268
4269 /* skip not-present entries. */
4270 if (!SHW_PTE_IS_P(PteDst)) /** @todo deal with ALL handlers and CSAM !P pages! */
4271 continue;
4272
4273 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4274
4275 /* match the physical addresses */
4276 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
4277
4278# ifdef IN_RING3
4279 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4280 if (RT_FAILURE(rc))
4281 {
4282 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4283 {
4284 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4285 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4286 cErrors++;
4287 }
4288 }
4289 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4290 {
4291 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4292 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4293 cErrors++;
4294 continue;
4295 }
4296# endif
4297 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
4298 if (!pPhysPage)
4299 {
4300# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4301 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4302 {
4303 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4304 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4305 cErrors++;
4306 continue;
4307 }
4308# endif
4309 if (SHW_PTE_IS_RW(PteDst))
4310 {
4311 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4312 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4313 cErrors++;
4314 }
4315 fIgnoreFlags |= X86_PTE_RW;
4316 }
4317 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4318 {
4319 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4320 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4321 cErrors++;
4322 continue;
4323 }
4324
4325 /* flags */
4326 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4327 {
4328 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4329 {
4330 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4331 {
4332 if (SHW_PTE_IS_RW(PteDst))
4333 {
4334 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4335 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4336 cErrors++;
4337 continue;
4338 }
4339 fIgnoreFlags |= X86_PTE_RW;
4340 }
4341 }
4342 else
4343 {
4344 if ( SHW_PTE_IS_P(PteDst)
4345# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4346 && !PGM_PAGE_IS_MMIO(pPhysPage)
4347# endif
4348 )
4349 {
4350 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4351 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4352 cErrors++;
4353 continue;
4354 }
4355 fIgnoreFlags |= X86_PTE_P;
4356 }
4357 }
4358
4359 if ( (PdeSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4360 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags) /* lazy phys handler dereg. */
4361 )
4362 {
4363 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4364 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4365 fIgnoreFlags, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4366 cErrors++;
4367 continue;
4368 }
4369 } /* for each PTE */
4370 }
4371 }
4372 /* not present */
4373
4374 } /* for each PDE */
4375
4376 } /* for each PDPTE */
4377
4378 } /* for each PML4E */
4379
4380# ifdef DEBUG
4381 if (cErrors)
4382 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4383# endif
4384
4385#endif /* GST == 32BIT, PAE or AMD64 */
4386 return cErrors;
4387
4388#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4389}
4390#endif /* VBOX_STRICT */
4391
4392
4393/**
4394 * Sets up the CR3 for shadow paging
4395 *
4396 * @returns Strict VBox status code.
4397 * @retval VINF_SUCCESS.
4398 *
4399 * @param pVCpu The VMCPU handle.
4400 * @param GCPhysCR3 The physical address in the CR3 register.
4401 */
4402PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4403{
4404 PVM pVM = pVCpu->CTX_SUFF(pVM);
4405
4406 /* Update guest paging info. */
4407#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4408 || PGM_GST_TYPE == PGM_TYPE_PAE \
4409 || PGM_GST_TYPE == PGM_TYPE_AMD64
4410
4411 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4412
4413 /*
4414 * Map the page CR3 points at.
4415 */
4416 RTHCPTR HCPtrGuestCR3;
4417 RTHCPHYS HCPhysGuestCR3;
4418 pgmLock(pVM);
4419 PPGMPAGE pPageCR3 = pgmPhysGetPage(pVM, GCPhysCR3);
4420 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4421 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4422 /** @todo this needs some reworking wrt. locking? */
4423# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4424 HCPtrGuestCR3 = NIL_RTHCPTR;
4425 int rc = VINF_SUCCESS;
4426# else
4427 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4428# endif
4429 pgmUnlock(pVM);
4430 if (RT_SUCCESS(rc))
4431 {
4432 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4433 if (RT_SUCCESS(rc))
4434 {
4435# ifdef IN_RC
4436 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4437# endif
4438# if PGM_GST_TYPE == PGM_TYPE_32BIT
4439 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4440# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4441 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4442# endif
4443 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4444
4445# elif PGM_GST_TYPE == PGM_TYPE_PAE
4446 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4447 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4448# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4449 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4450# endif
4451 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4452 LogFlow(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4453
4454 /*
4455 * Map the 4 PDs too.
4456 */
4457 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(pVCpu);
4458 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4459 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4460 {
4461 if (pGuestPDPT->a[i].n.u1Present)
4462 {
4463 RTHCPTR HCPtr;
4464 RTHCPHYS HCPhys;
4465 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4466 pgmLock(pVM);
4467 PPGMPAGE pPage = pgmPhysGetPage(pVM, GCPhys);
4468 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4469 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4470# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4471 HCPtr = NIL_RTHCPTR;
4472 int rc2 = VINF_SUCCESS;
4473# else
4474 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4475# endif
4476 pgmUnlock(pVM);
4477 if (RT_SUCCESS(rc2))
4478 {
4479 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4480 AssertRCReturn(rc, rc);
4481
4482 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4483# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4484 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4485# endif
4486 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4487 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4488# ifdef IN_RC
4489 PGM_INVL_PG(pVCpu, GCPtr);
4490# endif
4491 continue;
4492 }
4493 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4494 }
4495
4496 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4497# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4498 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4499# endif
4500 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4501 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4502# ifdef IN_RC
4503 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4504# endif
4505 }
4506
4507# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4508 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4509# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4510 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4511# endif
4512# endif
4513 }
4514 else
4515 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4516 }
4517 else
4518 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4519
4520#else /* prot/real stub */
4521 int rc = VINF_SUCCESS;
4522#endif
4523
4524 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4525# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4526 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4527 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4528 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4529 && PGM_GST_TYPE != PGM_TYPE_PROT))
4530
4531 Assert(!pVM->pgm.s.fNestedPaging);
4532
4533 /*
4534 * Update the shadow root page as well since that's not fixed.
4535 */
4536 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4537 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4538 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4539 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4540 PPGMPOOLPAGE pNewShwPageCR3;
4541
4542 pgmLock(pVM);
4543
4544# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4545 if (pPool->cDirtyPages)
4546 pgmPoolResetDirtyPages(pVM);
4547# endif
4548
4549 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4550 rc = pgmPoolAllocEx(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE, SHW_POOL_ROOT_IDX,
4551 GCPhysCR3 >> PAGE_SHIFT, true /*fLockPage*/, &pNewShwPageCR3);
4552 AssertFatalRC(rc);
4553 rc = VINF_SUCCESS;
4554
4555# ifdef IN_RC
4556 /*
4557 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4558 * state will be inconsistent! Flush important things now while
4559 * we still can and then make sure there are no ring-3 calls.
4560 */
4561 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4562 VMMRZCallRing3Disable(pVCpu);
4563# endif
4564
4565 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4566 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4567 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4568# ifdef IN_RING0
4569 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4570 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4571# elif defined(IN_RC)
4572 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4573 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4574# else
4575 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4576 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4577# endif
4578
4579# ifndef PGM_WITHOUT_MAPPINGS
4580 /*
4581 * Apply all hypervisor mappings to the new CR3.
4582 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4583 * make sure we check for conflicts in the new CR3 root.
4584 */
4585# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4586 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4587# endif
4588 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4589 AssertRCReturn(rc, rc);
4590# endif
4591
4592 /* Set the current hypervisor CR3. */
4593 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4594 SELMShadowCR3Changed(pVM, pVCpu);
4595
4596# ifdef IN_RC
4597 /* NOTE: The state is consistent again. */
4598 VMMRZCallRing3Enable(pVCpu);
4599# endif
4600
4601 /* Clean up the old CR3 root. */
4602 if ( pOldShwPageCR3
4603 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4604 {
4605 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4606# ifndef PGM_WITHOUT_MAPPINGS
4607 /* Remove the hypervisor mappings from the shadow page table. */
4608 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4609# endif
4610 /* Mark the page as unlocked; allow flushing again. */
4611 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4612
4613 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4614 }
4615 pgmUnlock(pVM);
4616# endif
4617
4618 return rc;
4619}
4620
4621/**
4622 * Unmaps the shadow CR3.
4623 *
4624 * @returns VBox status, no specials.
4625 * @param pVCpu The VMCPU handle.
4626 */
4627PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4628{
4629 LogFlow(("UnmapCR3\n"));
4630
4631 int rc = VINF_SUCCESS;
4632 PVM pVM = pVCpu->CTX_SUFF(pVM);
4633
4634 /*
4635 * Update guest paging info.
4636 */
4637#if PGM_GST_TYPE == PGM_TYPE_32BIT
4638 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4639# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4640 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4641# endif
4642 pVCpu->pgm.s.pGst32BitPdRC = 0;
4643
4644#elif PGM_GST_TYPE == PGM_TYPE_PAE
4645 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4646# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4647 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4648# endif
4649 pVCpu->pgm.s.pGstPaePdptRC = 0;
4650 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4651 {
4652 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4653# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4654 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4655# endif
4656 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4657 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4658 }
4659
4660#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4661 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4662# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4663 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4664# endif
4665
4666#else /* prot/real mode stub */
4667 /* nothing to do */
4668#endif
4669
4670#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4671 /*
4672 * Update shadow paging info.
4673 */
4674# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4675 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4676 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4677
4678# if PGM_GST_TYPE != PGM_TYPE_REAL
4679 Assert(!pVM->pgm.s.fNestedPaging);
4680# endif
4681
4682 pgmLock(pVM);
4683
4684# ifndef PGM_WITHOUT_MAPPINGS
4685 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4686 /* Remove the hypervisor mappings from the shadow page table. */
4687 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4688# endif
4689
4690 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4691 {
4692 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4693
4694 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4695
4696# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4697 if (pPool->cDirtyPages)
4698 pgmPoolResetDirtyPages(pVM);
4699# endif
4700
4701 /* Mark the page as unlocked; allow flushing again. */
4702 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4703
4704 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4705 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4706 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4707 pVCpu->pgm.s.pShwPageCR3RC = 0;
4708 pVCpu->pgm.s.iShwUser = 0;
4709 pVCpu->pgm.s.iShwUserTable = 0;
4710 }
4711 pgmUnlock(pVM);
4712# endif
4713#endif /* !IN_RC*/
4714
4715 return rc;
4716}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette