VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 32778

Last change on this file since 32778 was 32778, checked in by vboxsync, 14 years ago

Sync the entire page table for new nested page tables. (partially syncing them doesn't really make sense)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 202.7 KB
Line 
1/* $Id: PGMAllBth.h 32778 2010-09-27 14:13:11Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks The nested page tables on AMD makes use of PGM_SHW_TYPE in
6 * {PGM_TYPE_AMD64, PGM_TYPE_PAE and PGM_TYPE_32BIT} and PGM_GST_TYPE
7 * set to PGM_TYPE_PROT. Half of the code in this file is not
8 * exercised with PGM_SHW_TYPE set to PGM_TYPE_NESTED.
9 *
10 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
11 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
12 *
13 * @remarks This file is one big \#ifdef-orgy!
14 *
15 */
16
17/*
18 * Copyright (C) 2006-2010 Oracle Corporation
19 *
20 * This file is part of VirtualBox Open Source Edition (OSE), as
21 * available from http://www.virtualbox.org. This file is free software;
22 * you can redistribute it and/or modify it under the terms of the GNU
23 * General Public License (GPL) as published by the Free Software
24 * Foundation, in version 2 as it comes in the "COPYING" file of the
25 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
26 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
27 */
28
29
30/*******************************************************************************
31* Internal Functions *
32*******************************************************************************/
33RT_C_DECLS_BEGIN
34PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
35PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
36static int PGM_BTH_NAME(SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
37static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage);
38static int PGM_BTH_NAME(SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
39static void PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
40PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
41PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
42PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
43#ifdef VBOX_STRICT
44PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
45#endif
46PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
47PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
48RT_C_DECLS_END
49
50
51/*
52 * Filter out some illegal combinations of guest and shadow paging, so we can
53 * remove redundant checks inside functions.
54 */
55#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
56# error "Invalid combination; PAE guest implies PAE shadow"
57#endif
58
59#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
60 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
61# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
62#endif
63
64#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
65 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
66# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
67#endif
68
69#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
70 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
71# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
72#endif
73
74#ifndef IN_RING3
75
76# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
77/**
78 * Deal with a guest page fault.
79 *
80 * @returns Strict VBox status code.
81 * @retval VINF_EM_RAW_GUEST_TRAP
82 * @retval VINF_EM_RAW_EMULATE_INSTR
83 *
84 * @param pVCpu The current CPU.
85 * @param pGstWalk The guest page table walk result.
86 * @param uErr The error code.
87 */
88PGM_BTH_DECL(VBOXSTRICTRC, Trap0eHandlerGuestFault)(PVMCPU pVCpu, PGSTPTWALK pGstWalk, RTGCUINT uErr)
89{
90# if !defined(PGM_WITHOUT_MAPPINGS) && (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE)
91 /*
92 * Check for write conflicts with our hypervisor mapping.
93 *
94 * If the guest happens to access a non-present page, where our hypervisor
95 * is currently mapped, then we'll create a #PF storm in the guest.
96 */
97 if ( (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW)
98 && MMHyperIsInsideArea(pVCpu->CTX_SUFF(pVM), pGstWalk->Core.GCPtr))
99 {
100 /* Force a CR3 sync to check for conflicts and emulate the instruction. */
101 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
102 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2GuestTrap; });
103 return VINF_EM_RAW_EMULATE_INSTR;
104 }
105# endif
106
107 /*
108 * Calc the error code for the guest trap.
109 */
110 uint32_t uNewErr = GST_IS_NX_ACTIVE(pVCpu)
111 ? uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID)
112 : uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US);
113 if (pGstWalk->Core.fBadPhysAddr)
114 {
115 uNewErr |= X86_TRAP_PF_RSVD | X86_TRAP_PF_P;
116 Assert(!pGstWalk->Core.fNotPresent);
117 }
118 else if (!pGstWalk->Core.fNotPresent)
119 uNewErr |= X86_TRAP_PF_P;
120 TRPMSetErrorCode(pVCpu, uNewErr);
121
122 LogFlow(("Guest trap; cr2=%RGv uErr=%RGv lvl=%d\n", pGstWalk->Core.GCPtr, uErr, pGstWalk->Core.uLevel));
123 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2GuestTrap; });
124 return VINF_EM_RAW_GUEST_TRAP;
125}
126# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
127
128
129/**
130 * Deal with a guest page fault.
131 *
132 * The caller has taken the PGM lock.
133 *
134 * @returns Strict VBox status code.
135 *
136 * @param pVCpu The current CPU.
137 * @param uErr The error code.
138 * @param pRegFrame The register frame.
139 * @param pvFault The fault address.
140 * @param pPage The guest page at @a pvFault.
141 * @param pGstWalk The guest page table walk result.
142 * @param pfLockTaken PGM lock taken here or not (out). This is true
143 * when we're called.
144 */
145static VBOXSTRICTRC PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame,
146 RTGCPTR pvFault, PPGMPAGE pPage, bool *pfLockTaken
147# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
148 , PGSTPTWALK pGstWalk
149# endif
150 )
151{
152# if !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
153 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A };
154#endif
155 PVM pVM = pVCpu->CTX_SUFF(pVM);
156 int rc;
157
158 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
159 {
160 /*
161 * Physical page access handler.
162 */
163# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
164 const RTGCPHYS GCPhysFault = pGstWalk->Core.GCPhys;
165# else
166 const RTGCPHYS GCPhysFault = (RTGCPHYS)pvFault;
167# endif
168 PPGMPHYSHANDLER pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
169 if (pCur)
170 {
171# ifdef PGM_SYNC_N_PAGES
172 /*
173 * If the region is write protected and we got a page not present fault, then sync
174 * the pages. If the fault was caused by a read, then restart the instruction.
175 * In case of write access continue to the GC write handler.
176 *
177 * ASSUMES that there is only one handler per page or that they have similar write properties.
178 */
179 if ( !(uErr & X86_TRAP_PF_P)
180 && pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE)
181 {
182# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
183 rc = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
184# else
185 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
186# endif
187 if ( RT_FAILURE(rc)
188 || !(uErr & X86_TRAP_PF_RW)
189 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
190 {
191 AssertRC(rc);
192 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersOutOfSync);
193 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndPhys; });
194 return rc;
195 }
196 }
197# endif
198# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
199 /*
200 * If the access was not thru a #PF(RSVD|...) resync the page.
201 */
202 if ( !(uErr & X86_TRAP_PF_RSVD)
203 && pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
204# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
205 && pGstWalk->Core.fEffectiveRW
206 && !pGstWalk->Core.fEffectiveUS /** @todo Remove pGstWalk->Core.fEffectiveUS and X86_PTE_US further down in the sync code. */
207# endif
208 )
209 {
210# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
211 rc = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
212# else
213 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
214# endif
215 if ( RT_FAILURE(rc)
216 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
217 {
218 AssertRC(rc);
219 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersOutOfSync);
220 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndPhys; });
221 return rc;
222 }
223 }
224# endif
225
226 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
227 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
228 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n",
229 pvFault, GCPhysFault, pPage, uErr, pCur->enmType));
230 if (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE)
231 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersPhysWrite);
232 else
233 {
234 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersPhysAll);
235 if (uErr & X86_TRAP_PF_RSVD) STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersPhysAllOpt);
236 }
237
238 if (pCur->CTX_SUFF(pfnHandler))
239 {
240 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
241 void *pvUser = pCur->CTX_SUFF(pvUser);
242# ifdef IN_RING0
243 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
244# else
245 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
246# endif
247
248 STAM_PROFILE_START(&pCur->Stat, h);
249 if (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler))
250 {
251 pgmUnlock(pVM);
252 *pfLockTaken = false;
253 }
254
255 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
256
257# ifdef VBOX_WITH_STATISTICS
258 pgmLock(pVM);
259 pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
260 if (pCur)
261 STAM_PROFILE_STOP(&pCur->Stat, h);
262 pgmUnlock(pVM);
263# endif
264 }
265 else
266 rc = VINF_EM_RAW_EMULATE_INSTR;
267
268 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndPhys; });
269 return rc;
270 }
271 }
272# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
273 else
274 {
275# ifdef PGM_SYNC_N_PAGES
276 /*
277 * If the region is write protected and we got a page not present fault, then sync
278 * the pages. If the fault was caused by a read, then restart the instruction.
279 * In case of write access continue to the GC write handler.
280 */
281 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
282 && !(uErr & X86_TRAP_PF_P))
283 {
284 rc = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
285 if ( RT_FAILURE(rc)
286 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
287 || !(uErr & X86_TRAP_PF_RW))
288 {
289 AssertRC(rc);
290 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersOutOfSync);
291 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndVirt; });
292 return rc;
293 }
294 }
295# endif
296 /*
297 * Ok, it's an virtual page access handler.
298 *
299 * Since it's faster to search by address, we'll do that first
300 * and then retry by GCPhys if that fails.
301 */
302 /** @todo r=bird: perhaps we should consider looking up by physical address directly now?
303 * r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be
304 * out of sync, because the page was changed without us noticing it (not-present -> present
305 * without invlpg or mov cr3, xxx).
306 */
307 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
308 if (pCur)
309 {
310 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
311 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
312 || !(uErr & X86_TRAP_PF_P)
313 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
314 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n",
315 pvFault, pGstWalk->Core.GCPhys, pPage, uErr, pCur->enmType));
316
317 if ( pvFault - pCur->Core.Key < pCur->cb
318 && ( uErr & X86_TRAP_PF_RW
319 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
320 {
321# ifdef IN_RC
322 STAM_PROFILE_START(&pCur->Stat, h);
323 RTGCPTR GCPtrStart = pCur->Core.Key;
324 CTX_MID(PFNPGM,VIRTHANDLER) pfnHandler = pCur->CTX_SUFF(pfnHandler);
325 pgmUnlock(pVM);
326 *pfLockTaken = false;
327
328 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPtrStart, pvFault - GCPtrStart);
329
330# ifdef VBOX_WITH_STATISTICS
331 pgmLock(pVM);
332 pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
333 if (pCur)
334 STAM_PROFILE_STOP(&pCur->Stat, h);
335 pgmUnlock(pVM);
336# endif
337# else
338 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
339# endif
340 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersVirtual);
341 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndVirt; });
342 return rc;
343 }
344 /* Unhandled part of a monitored page */
345 }
346 else
347 {
348 /* Check by physical address. */
349 unsigned iPage;
350 rc = pgmHandlerVirtualFindByPhysAddr(pVM, pGstWalk->Core.GCPhys, &pCur, &iPage);
351 Assert(RT_SUCCESS(rc) || !pCur);
352 if ( pCur
353 && ( uErr & X86_TRAP_PF_RW
354 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
355 {
356 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == (pGstWalk->Core.GCPhys & X86_PTE_PAE_PG_MASK));
357# ifdef IN_RC
358 STAM_PROFILE_START(&pCur->Stat, h);
359 RTGCPTR GCPtrStart = pCur->Core.Key;
360 CTX_MID(PFNPGM,VIRTHANDLER) pfnHandler = pCur->CTX_SUFF(pfnHandler);
361 pgmUnlock(pVM);
362 *pfLockTaken = false;
363
364 RTGCPTR off = (iPage << PAGE_SHIFT)
365 + (pvFault & PAGE_OFFSET_MASK)
366 - (GCPtrStart & PAGE_OFFSET_MASK);
367 Assert(off < pCur->cb);
368 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPtrStart, off);
369
370# ifdef VBOX_WITH_STATISTICS
371 pgmLock(pVM);
372 pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, GCPtrStart);
373 if (pCur)
374 STAM_PROFILE_STOP(&pCur->Stat, h);
375 pgmUnlock(pVM);
376# endif
377# else
378 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
379# endif
380 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersVirtualByPhys);
381 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndVirt; });
382 return rc;
383 }
384 }
385 }
386# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
387
388 /*
389 * There is a handled area of the page, but this fault doesn't belong to it.
390 * We must emulate the instruction.
391 *
392 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
393 * we first check if this was a page-not-present fault for a page with only
394 * write access handlers. Restart the instruction if it wasn't a write access.
395 */
396 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersUnhandled);
397
398 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
399 && !(uErr & X86_TRAP_PF_P))
400 {
401# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
402 rc = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
403# else
404 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
405# endif
406 if ( RT_FAILURE(rc)
407 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
408 || !(uErr & X86_TRAP_PF_RW))
409 {
410 AssertRC(rc);
411 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersOutOfSync);
412 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndPhys; });
413 return rc;
414 }
415 }
416
417 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
418 * It's writing to an unhandled part of the LDT page several million times.
419 */
420 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
421 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
422 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndUnhandled; });
423 return rc;
424} /* if any kind of handler */
425
426
427/**
428 * #PF Handler for raw-mode guest execution.
429 *
430 * @returns VBox status code (appropriate for trap handling and GC return).
431 *
432 * @param pVCpu VMCPU Handle.
433 * @param uErr The trap error code.
434 * @param pRegFrame Trap register frame.
435 * @param pvFault The fault address.
436 * @param pfLockTaken PGM lock taken here or not (out)
437 */
438PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
439{
440 PVM pVM = pVCpu->CTX_SUFF(pVM);
441
442 *pfLockTaken = false;
443
444# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
445 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
446 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
447 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
448 int rc;
449
450# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
451 /*
452 * Walk the guest page translation tables and check if it's a guest fault.
453 */
454 GSTPTWALK GstWalk;
455 rc = PGM_GST_NAME(Walk)(pVCpu, pvFault, &GstWalk);
456 if (RT_FAILURE_NP(rc))
457 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &GstWalk, uErr));
458
459 /* assert some GstWalk sanity. */
460# if PGM_GST_TYPE == PGM_TYPE_AMD64
461 AssertMsg(GstWalk.Pml4e.u == GstWalk.pPml4e->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pml4e.u, (uint64_t)GstWalk.pPml4e->u));
462# endif
463# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
464 AssertMsg(GstWalk.Pdpe.u == GstWalk.pPdpe->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pdpe.u, (uint64_t)GstWalk.pPdpe->u));
465# endif
466 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u));
467 AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u));
468 Assert(GstWalk.Core.fSucceeded);
469
470 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
471 {
472 if ( ( (uErr & X86_TRAP_PF_RW)
473 && !GstWalk.Core.fEffectiveRW
474 && ( (uErr & X86_TRAP_PF_US)
475 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
476 || ((uErr & X86_TRAP_PF_US) && !GstWalk.Core.fEffectiveUS)
477 || ((uErr & X86_TRAP_PF_ID) && GstWalk.Core.fEffectiveNX)
478 )
479 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &GstWalk, uErr));
480 }
481
482 /*
483 * Set the accessed and dirty flags.
484 */
485# if PGM_GST_TYPE == PGM_TYPE_AMD64
486 GstWalk.Pml4e.u |= X86_PML4E_A;
487 GstWalk.pPml4e->u |= X86_PML4E_A;
488 GstWalk.Pdpe.u |= X86_PDPE_A;
489 GstWalk.pPdpe->u |= X86_PDPE_A;
490# endif
491 if (GstWalk.Core.fBigPage)
492 {
493 Assert(GstWalk.Pde.b.u1Size);
494 if (uErr & X86_TRAP_PF_RW)
495 {
496 GstWalk.Pde.u |= X86_PDE4M_A | X86_PDE4M_D;
497 GstWalk.pPde->u |= X86_PDE4M_A | X86_PDE4M_D;
498 }
499 else
500 {
501 GstWalk.Pde.u |= X86_PDE4M_A;
502 GstWalk.pPde->u |= X86_PDE4M_A;
503 }
504 }
505 else
506 {
507 Assert(!GstWalk.Pde.b.u1Size);
508 GstWalk.Pde.u |= X86_PDE_A;
509 GstWalk.pPde->u |= X86_PDE_A;
510 if (uErr & X86_TRAP_PF_RW)
511 {
512# ifdef VBOX_WITH_STATISTICS
513 if (!GstWalk.Pte.n.u1Dirty)
514 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtiedPage));
515 else
516 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageAlreadyDirty));
517# endif
518 GstWalk.Pte.u |= X86_PTE_A | X86_PTE_D;
519 GstWalk.pPte->u |= X86_PTE_A | X86_PTE_D;
520 }
521 else
522 {
523 GstWalk.Pte.u |= X86_PTE_A;
524 GstWalk.pPte->u |= X86_PTE_A;
525 }
526 Assert(GstWalk.Pte.u == GstWalk.pPte->u);
527 }
528 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u,
529 ("%RX64 %RX64 pPte=%p pPde=%p Pte=%RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u, GstWalk.pPte, GstWalk.pPde, (uint64_t)GstWalk.pPte->u));
530# else /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
531 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A}; /** @todo eliminate this */
532# endif /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
533
534 /* Take the big lock now. */
535 *pfLockTaken = true;
536 pgmLock(pVM);
537
538# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
539 /*
540 * If it is a reserved bit fault we know that it is an MMIO (access
541 * handler) related fault and can skip some 200 lines of code.
542 */
543 if (uErr & X86_TRAP_PF_RSVD)
544 {
545 Assert(uErr & X86_TRAP_PF_P);
546 PPGMPAGE pPage;
547# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
548 rc = pgmPhysGetPageEx(&pVM->pgm.s, GstWalk.Core.GCPhys, &pPage);
549 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
550 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
551 pfLockTaken, &GstWalk));
552 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
553# else
554 rc = pgmPhysGetPageEx(&pVM->pgm.s, (RTGCPHYS)pvFault, &pPage);
555 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
556 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
557 pfLockTaken));
558 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
559# endif
560 AssertRC(rc);
561 PGM_INVL_PG(pVCpu, pvFault);
562 return rc; /* Restart with the corrected entry. */
563 }
564# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
565
566 /*
567 * Fetch the guest PDE, PDPE and PML4E.
568 */
569# if PGM_SHW_TYPE == PGM_TYPE_32BIT
570 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
571 PX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
572
573# elif PGM_SHW_TYPE == PGM_TYPE_PAE
574 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
575 PX86PDPAE pPDDst;
576# if PGM_GST_TYPE == PGM_TYPE_PAE
577 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, GstWalk.Pdpe.u, &pPDDst);
578# else
579 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, X86_PDPE_P, &pPDDst); /* RW, US and A are reserved in PAE mode. */
580# endif
581 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_INTERNAL_ERROR_4);
582
583# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
584 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
585 PX86PDPAE pPDDst;
586# if PGM_GST_TYPE == PGM_TYPE_PROT /* (AMD-V nested paging) */
587 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A,
588 X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A, &pPDDst);
589# else
590 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, GstWalk.Pml4e.u, GstWalk.Pdpe.u, &pPDDst);
591# endif
592 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_INTERNAL_ERROR_4);
593
594# elif PGM_SHW_TYPE == PGM_TYPE_EPT
595 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
596 PEPTPD pPDDst;
597 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
598 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_INTERNAL_ERROR_4);
599# endif
600 Assert(pPDDst);
601
602# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
603 /*
604 * Dirty page handling.
605 *
606 * If we successfully correct the write protection fault due to dirty bit
607 * tracking, then return immediately.
608 */
609 if (uErr & X86_TRAP_PF_RW) /* write fault? */
610 {
611 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyBitTracking), a);
612 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], GstWalk.pPde, pvFault);
613 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyBitTracking), a);
614 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
615 {
616 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
617 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
618 ? &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2DirtyAndAccessed
619 : &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2GuestTrap; });
620 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
621 return VINF_SUCCESS;
622 }
623 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u));
624 AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u));
625 }
626
627# if 0 /* rarely useful; leave for debugging. */
628 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
629# endif
630# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
631
632 /*
633 * A common case is the not-present error caused by lazy page table syncing.
634 *
635 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
636 * here so we can safely assume that the shadow PT is present when calling
637 * SyncPage later.
638 *
639 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
640 * of mapping conflict and defer to SyncCR3 in R3.
641 * (Again, we do NOT support access handlers for non-present guest pages.)
642 *
643 */
644# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
645 Assert(GstWalk.Pde.n.u1Present);
646# endif
647 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
648 && !pPDDst->a[iPDDst].n.u1Present)
649 {
650 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2SyncPT; });
651# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
652 LogFlow(("=>SyncPT %04x = %08RX64\n", (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, (uint64_t)GstWalk.Pde.u));
653 rc = PGM_BTH_NAME(SyncPT)(pVCpu, (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, GstWalk.pPd, pvFault);
654# else
655 LogFlow(("=>SyncPT pvFault=%RGv\n", pvFault));
656 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0, NULL, pvFault);
657# endif
658 if (RT_SUCCESS(rc))
659 return rc;
660 Log(("SyncPT: %RGv failed!! rc=%Rrc\n", pvFault, rc));
661 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
662 return VINF_PGM_SYNC_CR3;
663 }
664
665# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
666 /*
667 * Check if this address is within any of our mappings.
668 *
669 * This is *very* fast and it's gonna save us a bit of effort below and prevent
670 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
671 * (BTW, it's impossible to have physical access handlers in a mapping.)
672 */
673 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
674 {
675 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
676 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
677 {
678 if (pvFault < pMapping->GCPtr)
679 break;
680 if (pvFault - pMapping->GCPtr < pMapping->cb)
681 {
682 /*
683 * The first thing we check is if we've got an undetected conflict.
684 */
685 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
686 {
687 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
688 while (iPT-- > 0)
689 if (GstWalk.pPde[iPT].n.u1Present)
690 {
691 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eConflicts);
692 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
693 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
694 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Mapping; });
695 return VINF_PGM_SYNC_CR3;
696 }
697 }
698
699 /*
700 * Check if the fault address is in a virtual page access handler range.
701 */
702 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
703 if ( pCur
704 && pvFault - pCur->Core.Key < pCur->cb
705 && uErr & X86_TRAP_PF_RW)
706 {
707# ifdef IN_RC
708 STAM_PROFILE_START(&pCur->Stat, h);
709 pgmUnlock(pVM);
710 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
711 pgmLock(pVM);
712 STAM_PROFILE_STOP(&pCur->Stat, h);
713# else
714 AssertFailed();
715 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
716# endif
717 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersMapping);
718 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Mapping; });
719 return rc;
720 }
721
722 /*
723 * Pretend we're not here and let the guest handle the trap.
724 */
725 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
726 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eGuestPFMapping);
727 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
728 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Mapping; });
729 return VINF_EM_RAW_GUEST_TRAP;
730 }
731 }
732 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
733# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
734
735 /*
736 * Check if this fault address is flagged for special treatment,
737 * which means we'll have to figure out the physical address and
738 * check flags associated with it.
739 *
740 * ASSUME that we can limit any special access handling to pages
741 * in page tables which the guest believes to be present.
742 */
743# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
744 RTGCPHYS GCPhys = GstWalk.Core.GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
745# else
746 RTGCPHYS GCPhys = (RTGCPHYS)pvFault & ~(RTGCPHYS)PAGE_OFFSET_MASK;
747# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
748 PPGMPAGE pPage;
749 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
750 if (RT_FAILURE(rc))
751 {
752 /*
753 * When the guest accesses invalid physical memory (e.g. probing
754 * of RAM or accessing a remapped MMIO range), then we'll fall
755 * back to the recompiler to emulate the instruction.
756 */
757 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
758 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersInvalid);
759 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2InvalidPhys; });
760 return VINF_EM_RAW_EMULATE_INSTR;
761 }
762
763 /*
764 * Any handlers for this page?
765 */
766 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
767# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
768 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken,
769 &GstWalk));
770# else
771 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken));
772# endif
773
774 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTimeOutOfSync, c);
775
776# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
777 if (uErr & X86_TRAP_PF_P)
778 {
779 /*
780 * The page isn't marked, but it might still be monitored by a virtual page access handler.
781 * (ASSUMES no temporary disabling of virtual handlers.)
782 */
783 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
784 * we should correct both the shadow page table and physical memory flags, and not only check for
785 * accesses within the handler region but for access to pages with virtual handlers. */
786 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
787 if (pCur)
788 {
789 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
790 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
791 || !(uErr & X86_TRAP_PF_P)
792 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
793 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
794
795 if ( pvFault - pCur->Core.Key < pCur->cb
796 && ( uErr & X86_TRAP_PF_RW
797 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
798 {
799# ifdef IN_RC
800 STAM_PROFILE_START(&pCur->Stat, h);
801 pgmUnlock(pVM);
802 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
803 pgmLock(pVM);
804 STAM_PROFILE_STOP(&pCur->Stat, h);
805# else
806 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
807# endif
808 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndVirt; });
809 return rc;
810 }
811 }
812 }
813# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
814
815 /*
816 * We are here only if page is present in Guest page tables and
817 * trap is not handled by our handlers.
818 *
819 * Check it for page out-of-sync situation.
820 */
821 if (!(uErr & X86_TRAP_PF_P))
822 {
823 /*
824 * Page is not present in our page tables. Try to sync it!
825 */
826 if (uErr & X86_TRAP_PF_US)
827 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncUser));
828 else /* supervisor */
829 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
830
831 if (PGM_PAGE_IS_BALLOONED(pPage))
832 {
833 /* Emulate reads from ballooned pages as they are not present in
834 our shadow page tables. (Required for e.g. Solaris guests; soft
835 ecc, random nr generator.) */
836 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
837 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
838 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncBallloon));
839 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Ballooned; });
840 return rc;
841 }
842
843# if defined(LOG_ENABLED) && !defined(IN_RING0)
844 RTGCPHYS GCPhys2;
845 uint64_t fPageGst2;
846 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
847# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
848 Log(("Page out of sync: %RGv eip=%08x PdeSrc.US=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
849 pvFault, pRegFrame->eip, GstWalk.Pde.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
850# else
851 Log(("Page out of sync: %RGv eip=%08x fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
852 pvFault, pRegFrame->eip, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
853# endif
854# endif /* LOG_ENABLED */
855
856# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
857 if ( !GstWalk.Core.fEffectiveUS
858 && CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
859 {
860 /* Note: Can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU. */
861 if ( pvFault == (RTGCPTR)pRegFrame->eip
862 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
863# ifdef CSAM_DETECT_NEW_CODE_PAGES
864 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
865 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
866# endif /* CSAM_DETECT_NEW_CODE_PAGES */
867 )
868 {
869 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
870 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
871 if (rc != VINF_SUCCESS)
872 {
873 /*
874 * CSAM needs to perform a job in ring 3.
875 *
876 * Sync the page before going to the host context; otherwise we'll end up in a loop if
877 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
878 */
879 LogFlow(("CSAM ring 3 job\n"));
880 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
881 AssertRC(rc2);
882
883 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2CSAM; });
884 return rc;
885 }
886 }
887# ifdef CSAM_DETECT_NEW_CODE_PAGES
888 else if ( uErr == X86_TRAP_PF_RW
889 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
890 && pRegFrame->ecx < 0x10000)
891 {
892 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
893 * to detect loading of new code pages.
894 */
895
896 /*
897 * Decode the instruction.
898 */
899 RTGCPTR PC;
900 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs,
901 &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
902 if (rc == VINF_SUCCESS)
903 {
904 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
905 uint32_t cbOp;
906 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
907
908 /* For now we'll restrict this to rep movsw/d instructions */
909 if ( rc == VINF_SUCCESS
910 && pDis->pCurInstr->opcode == OP_MOVSWD
911 && (pDis->prefix & PREFIX_REP))
912 {
913 CSAMMarkPossibleCodePage(pVM, pvFault);
914 }
915 }
916 }
917# endif /* CSAM_DETECT_NEW_CODE_PAGES */
918
919 /*
920 * Mark this page as safe.
921 */
922 /** @todo not correct for pages that contain both code and data!! */
923 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
924 CSAMMarkPage(pVM, pvFault, true);
925 }
926# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
927# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
928 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
929# else
930 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
931# endif
932 if (RT_SUCCESS(rc))
933 {
934 /* The page was successfully synced, return to the guest. */
935 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSync; });
936 return VINF_SUCCESS;
937 }
938 }
939 else /* uErr & X86_TRAP_PF_P: */
940 {
941 /*
942 * Write protected pages are made writable when the guest makes the
943 * first write to it. This happens for pages that are shared, write
944 * monitored or not yet allocated.
945 *
946 * We may also end up here when CR0.WP=0 in the guest.
947 *
948 * Also, a side effect of not flushing global PDEs are out of sync
949 * pages due to physical monitored regions, that are no longer valid.
950 * Assume for now it only applies to the read/write flag.
951 */
952 if (uErr & X86_TRAP_PF_RW)
953 {
954 /*
955 * Check if it is a read-only page.
956 */
957 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
958 {
959 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
960 Assert(!PGM_PAGE_IS_ZERO(pPage));
961 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
962 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2MakeWritable; });
963
964 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
965 if (rc != VINF_SUCCESS)
966 {
967 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
968 return rc;
969 }
970 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
971 return VINF_EM_NO_MEMORY;
972 }
973
974# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
975 /*
976 * Check to see if we need to emulate the instruction if CR0.WP=0.
977 */
978 if ( !GstWalk.Core.fEffectiveRW
979 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
980 && CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
981 {
982 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
983 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
984 if (RT_SUCCESS(rc))
985 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eWPEmulInRZ);
986 else
987 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eWPEmulToR3);
988 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2WPEmulation; });
989 return rc;
990 }
991# endif
992 /// @todo count the above case; else
993 if (uErr & X86_TRAP_PF_US)
994 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
995 else /* supervisor */
996 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
997
998 /*
999 * Sync the page.
1000 *
1001 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
1002 * page is not present, which is not true in this case.
1003 */
1004# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1005 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
1006# else
1007 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
1008# endif
1009 if (RT_SUCCESS(rc))
1010 {
1011 /*
1012 * Page was successfully synced, return to guest but invalidate
1013 * the TLB first as the page is very likely to be in it.
1014 */
1015# if PGM_SHW_TYPE == PGM_TYPE_EPT
1016 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
1017# else
1018 PGM_INVL_PG(pVCpu, pvFault);
1019# endif
1020# ifdef VBOX_STRICT
1021 RTGCPHYS GCPhys2;
1022 uint64_t fPageGst;
1023 if (!pVM->pgm.s.fNestedPaging)
1024 {
1025 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
1026 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%Rrc fPageGst=%RX64\n", rc, fPageGst));
1027 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
1028 }
1029 uint64_t fPageShw;
1030 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
1031 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */,
1032 ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
1033# endif /* VBOX_STRICT */
1034 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndObs; });
1035 return VINF_SUCCESS;
1036 }
1037 }
1038 /** @todo else: why are we here? */
1039
1040# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && defined(VBOX_STRICT)
1041 /*
1042 * Check for VMM page flags vs. Guest page flags consistency.
1043 * Currently only for debug purposes.
1044 */
1045 if (RT_SUCCESS(rc))
1046 {
1047 /* Get guest page flags. */
1048 uint64_t fPageGst;
1049 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
1050 if (RT_SUCCESS(rc))
1051 {
1052 uint64_t fPageShw;
1053 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
1054
1055 /*
1056 * Compare page flags.
1057 * Note: we have AVL, A, D bits desynched.
1058 */
1059 AssertMsg( (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
1060 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
1061 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n",
1062 pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
1063 }
1064 else
1065 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
1066 }
1067 else
1068 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
1069# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && VBOX_STRICT */
1070 }
1071
1072
1073 /*
1074 * If we get here it is because something failed above, i.e. most like guru
1075 * meditiation time.
1076 */
1077 LogRel(("%s: returns rc=%Rrc pvFault=%RGv uErr=%RX64 cs:rip=%04x:%08RX64\n",
1078 __PRETTY_FUNCTION__, rc, pvFault, (uint64_t)uErr, pRegFrame->cs, pRegFrame->rip));
1079 return rc;
1080
1081# else /* Nested paging, EPT except PGM_GST_TYPE = PROT */
1082 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
1083 return VERR_INTERNAL_ERROR;
1084# endif
1085}
1086#endif /* !IN_RING3 */
1087
1088
1089/**
1090 * Emulation of the invlpg instruction.
1091 *
1092 *
1093 * @returns VBox status code.
1094 *
1095 * @param pVCpu The VMCPU handle.
1096 * @param GCPtrPage Page to invalidate.
1097 *
1098 * @remark ASSUMES that the guest is updating before invalidating. This order
1099 * isn't required by the CPU, so this is speculative and could cause
1100 * trouble.
1101 * @remark No TLB shootdown is done on any other VCPU as we assume that
1102 * invlpg emulation is the *only* reason for calling this function.
1103 * (The guest has to shoot down TLB entries on other CPUs itself)
1104 * Currently true, but keep in mind!
1105 *
1106 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1107 * Should only be required when PGMPOOL_WITH_OPTIMIZED_DIRTY_PT is active (PAE or AMD64 (for now))
1108 */
1109PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
1110{
1111#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1112 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1113 && PGM_SHW_TYPE != PGM_TYPE_EPT
1114 int rc;
1115 PVM pVM = pVCpu->CTX_SUFF(pVM);
1116 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1117
1118 Assert(PGMIsLockOwner(pVM));
1119
1120 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1121
1122 /*
1123 * Get the shadow PD entry and skip out if this PD isn't present.
1124 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1125 */
1126# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1127 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1128 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1129
1130 /* Fetch the pgm pool shadow descriptor. */
1131 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1132 Assert(pShwPde);
1133
1134# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1135 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1136 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
1137
1138 /* If the shadow PDPE isn't present, then skip the invalidate. */
1139 if (!pPdptDst->a[iPdpt].n.u1Present)
1140 {
1141 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1142 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1143 return VINF_SUCCESS;
1144 }
1145
1146 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1147 PPGMPOOLPAGE pShwPde = NULL;
1148 PX86PDPAE pPDDst;
1149
1150 /* Fetch the pgm pool shadow descriptor. */
1151 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
1152 AssertRCSuccessReturn(rc, rc);
1153 Assert(pShwPde);
1154
1155 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1156 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1157
1158# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1159 /* PML4 */
1160 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1161 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1162 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1163 PX86PDPAE pPDDst;
1164 PX86PDPT pPdptDst;
1165 PX86PML4E pPml4eDst;
1166 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1167 if (rc != VINF_SUCCESS)
1168 {
1169 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1170 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1171 return VINF_SUCCESS;
1172 }
1173 Assert(pPDDst);
1174
1175 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1176 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1177
1178 if (!pPdpeDst->n.u1Present)
1179 {
1180 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1181 return VINF_SUCCESS;
1182 }
1183
1184 /* Fetch the pgm pool shadow descriptor. */
1185 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1186 Assert(pShwPde);
1187
1188# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1189
1190 const SHWPDE PdeDst = *pPdeDst;
1191 if (!PdeDst.n.u1Present)
1192 {
1193 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1194 return VINF_SUCCESS;
1195 }
1196
1197 /*
1198 * Get the guest PD entry and calc big page.
1199 */
1200# if PGM_GST_TYPE == PGM_TYPE_32BIT
1201 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1202 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1203 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1204# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1205 unsigned iPDSrc = 0;
1206# if PGM_GST_TYPE == PGM_TYPE_PAE
1207 X86PDPE PdpeSrcIgn;
1208 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrcIgn);
1209# else /* AMD64 */
1210 PX86PML4E pPml4eSrcIgn;
1211 X86PDPE PdpeSrcIgn;
1212 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrcIgn, &PdpeSrcIgn, &iPDSrc);
1213# endif
1214 GSTPDE PdeSrc;
1215
1216 if (pPDSrc)
1217 PdeSrc = pPDSrc->a[iPDSrc];
1218 else
1219 PdeSrc.u = 0;
1220# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1221 const bool fIsBigPage = PdeSrc.b.u1Size && GST_IS_PSE_ACTIVE(pVCpu);
1222
1223# ifdef IN_RING3
1224 /*
1225 * If a CR3 Sync is pending we may ignore the invalidate page operation
1226 * depending on the kind of sync and if it's a global page or not.
1227 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1228 */
1229# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1230 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1231 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1232 && fIsBigPage
1233 && PdeSrc.b.u1Global
1234 )
1235 )
1236# else
1237 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1238# endif
1239 {
1240 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1241 return VINF_SUCCESS;
1242 }
1243# endif /* IN_RING3 */
1244
1245 /*
1246 * Deal with the Guest PDE.
1247 */
1248 rc = VINF_SUCCESS;
1249 if (PdeSrc.n.u1Present)
1250 {
1251 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1252 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1253# ifndef PGM_WITHOUT_MAPPING
1254 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1255 {
1256 /*
1257 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1258 */
1259 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1260 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1261 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1262 }
1263 else
1264# endif /* !PGM_WITHOUT_MAPPING */
1265 if (!fIsBigPage)
1266 {
1267 /*
1268 * 4KB - page.
1269 */
1270 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1271 RTGCPHYS GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1272
1273# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1274 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1275 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1276# endif
1277 if (pShwPage->GCPhys == GCPhys)
1278 {
1279 /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1280 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1281
1282 PGSTPT pPTSrc;
1283 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1284 if (RT_SUCCESS(rc))
1285 {
1286 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1287 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1288 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1289 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1290 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1291 GCPtrPage, PteSrc.n.u1Present,
1292 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1293 PteSrc.n.u1User & PdeSrc.n.u1User,
1294 (uint64_t)PteSrc.u,
1295 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1296 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1297 }
1298 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePage4KBPages));
1299 PGM_INVL_PG(pVCpu, GCPtrPage);
1300 }
1301 else
1302 {
1303 /*
1304 * The page table address changed.
1305 */
1306 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1307 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1308 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1309 ASMAtomicWriteSize(pPdeDst, 0);
1310 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1311 PGM_INVL_VCPU_TLBS(pVCpu);
1312 }
1313 }
1314 else
1315 {
1316 /*
1317 * 2/4MB - page.
1318 */
1319 /* Before freeing the page, check if anything really changed. */
1320 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1321 RTGCPHYS GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1322# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1323 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1324 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1325# endif
1326 if ( pShwPage->GCPhys == GCPhys
1327 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1328 {
1329 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1330 /** @todo This test is wrong as it cannot check the G bit!
1331 * FIXME */
1332 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1333 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1334 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1335 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1336 {
1337 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1338 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1339 return VINF_SUCCESS;
1340 }
1341 }
1342
1343 /*
1344 * Ok, the page table is present and it's been changed in the guest.
1345 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1346 * We could do this for some flushes in GC too, but we need an algorithm for
1347 * deciding which 4MB pages containing code likely to be executed very soon.
1348 */
1349 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1350 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1351 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1352 ASMAtomicWriteSize(pPdeDst, 0);
1353 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePage4MBPages));
1354 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1355 }
1356 }
1357 else
1358 {
1359 /*
1360 * Page directory is not present, mark shadow PDE not present.
1361 */
1362 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1363 {
1364 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1365 ASMAtomicWriteSize(pPdeDst, 0);
1366 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePagePDNPs));
1367 PGM_INVL_PG(pVCpu, GCPtrPage);
1368 }
1369 else
1370 {
1371 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1372 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePagePDMappings));
1373 }
1374 }
1375 return rc;
1376
1377#else /* guest real and protected mode */
1378 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1379 return VINF_SUCCESS;
1380#endif
1381}
1382
1383
1384/**
1385 * Update the tracking of shadowed pages.
1386 *
1387 * @param pVCpu The VMCPU handle.
1388 * @param pShwPage The shadow page.
1389 * @param HCPhys The physical page we is being dereferenced.
1390 * @param iPte Shadow PTE index
1391 * @param GCPhysPage Guest physical address (only valid if pShwPage->fDirty is set)
1392 */
1393DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte, RTGCPHYS GCPhysPage)
1394{
1395 PVM pVM = pVCpu->CTX_SUFF(pVM);
1396
1397# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1398 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1399 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1400
1401 /* Use the hint we retrieved from the cached guest PT. */
1402 if (pShwPage->fDirty)
1403 {
1404 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1405
1406 Assert(pShwPage->cPresent);
1407 Assert(pPool->cPresent);
1408 pShwPage->cPresent--;
1409 pPool->cPresent--;
1410
1411 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysPage);
1412 AssertRelease(pPhysPage);
1413 pgmTrackDerefGCPhys(pPool, pShwPage, pPhysPage, iPte);
1414 return;
1415 }
1416# endif
1417
1418 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackDeref, a);
1419 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1420
1421 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1422 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1423 * 2. write protect all shadowed pages. I.e. implement caching.
1424 */
1425 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1426
1427 /*
1428 * Find the guest address.
1429 */
1430 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1431 pRam;
1432 pRam = pRam->CTX_SUFF(pNext))
1433 {
1434 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1435 while (iPage-- > 0)
1436 {
1437 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1438 {
1439 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1440
1441 Assert(pShwPage->cPresent);
1442 Assert(pPool->cPresent);
1443 pShwPage->cPresent--;
1444 pPool->cPresent--;
1445
1446 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1447 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackDeref, a);
1448 return;
1449 }
1450 }
1451 }
1452
1453 for (;;)
1454 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1455}
1456
1457
1458/**
1459 * Update the tracking of shadowed pages.
1460 *
1461 * @param pVCpu The VMCPU handle.
1462 * @param pShwPage The shadow page.
1463 * @param u16 The top 16-bit of the pPage->HCPhys.
1464 * @param pPage Pointer to the guest page. this will be modified.
1465 * @param iPTDst The index into the shadow table.
1466 */
1467DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1468{
1469 PVM pVM = pVCpu->CTX_SUFF(pVM);
1470
1471 /*
1472 * Just deal with the simple first time here.
1473 */
1474 if (!u16)
1475 {
1476 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackVirgin);
1477 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1478 /* Save the page table index. */
1479 PGM_PAGE_SET_PTE_INDEX(pPage, iPTDst);
1480 }
1481 else
1482 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1483
1484 /* write back */
1485 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1486 PGM_PAGE_SET_TRACKING(pPage, u16);
1487
1488 /* update statistics. */
1489 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1490 pShwPage->cPresent++;
1491 if (pShwPage->iFirstPresent > iPTDst)
1492 pShwPage->iFirstPresent = iPTDst;
1493}
1494
1495
1496/**
1497 * Modifies a shadow PTE to account for access handlers.
1498 *
1499 * @param pVM The VM handle.
1500 * @param pPage The page in question.
1501 * @param fPteSrc The shadowed flags of the source PTE. Must include the
1502 * A (accessed) bit so it can be emulated correctly.
1503 * @param pPteDst The shadow PTE (output). This is temporary storage and
1504 * does not need to be set atomically.
1505 */
1506DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVM pVM, PCPGMPAGE pPage, uint64_t fPteSrc, PSHWPTE pPteDst)
1507{
1508 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1509 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1510 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1511 {
1512 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark read-only\n", pPage));
1513#if PGM_SHW_TYPE == PGM_TYPE_EPT
1514 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage);
1515 pPteDst->n.u1Present = 1;
1516 pPteDst->n.u1Execute = 1;
1517 pPteDst->n.u1IgnorePAT = 1;
1518 pPteDst->n.u3EMT = VMX_EPT_MEMTYPE_WB;
1519 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1520#else
1521 if (fPteSrc & X86_PTE_A)
1522 {
1523 SHW_PTE_SET(*pPteDst, fPteSrc | PGM_PAGE_GET_HCPHYS(pPage));
1524 SHW_PTE_SET_RO(*pPteDst);
1525 }
1526 else
1527 SHW_PTE_SET(*pPteDst, 0);
1528#endif
1529 }
1530#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1531# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1532 else if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
1533 && ( BTH_IS_NP_ACTIVE(pVM)
1534 || (fPteSrc & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW) /** @todo Remove X86_PTE_US here and pGstWalk->Core.fEffectiveUS before the sync page test. */
1535# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1536 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1537# endif
1538 )
1539 {
1540 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1541# if PGM_SHW_TYPE == PGM_TYPE_EPT
1542 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1543 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg;
1544 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1545 pPteDst->n.u1Present = 0;
1546 pPteDst->n.u1Write = 1;
1547 pPteDst->n.u1Execute = 0;
1548 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1549 pPteDst->n.u3EMT = 7;
1550# else
1551 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1552 SHW_PTE_SET(*pPteDst, pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P);
1553# endif
1554 }
1555# endif
1556#endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1557 else
1558 {
1559 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1560 SHW_PTE_SET(*pPteDst, 0);
1561 }
1562 /** @todo count these kinds of entries. */
1563}
1564
1565
1566/**
1567 * Creates a 4K shadow page for a guest page.
1568 *
1569 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1570 * physical address. The PdeSrc argument only the flags are used. No page
1571 * structured will be mapped in this function.
1572 *
1573 * @param pVCpu The VMCPU handle.
1574 * @param pPteDst Destination page table entry.
1575 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1576 * Can safely assume that only the flags are being used.
1577 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1578 * @param pShwPage Pointer to the shadow page.
1579 * @param iPTDst The index into the shadow table.
1580 *
1581 * @remark Not used for 2/4MB pages!
1582 */
1583static void PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1584 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1585{
1586 PVM pVM = pVCpu->CTX_SUFF(pVM);
1587 RTGCPHYS GCPhysOldPage = NIL_RTGCPHYS;
1588
1589#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1590 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1591 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1592
1593 if (pShwPage->fDirty)
1594 {
1595 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1596 PGSTPT pGstPT;
1597
1598 pGstPT = (PGSTPT)&pPool->aDirtyPages[pShwPage->idxDirty].aPage[0];
1599 GCPhysOldPage = GST_GET_PTE_GCPHYS(pGstPT->a[iPTDst]);
1600 pGstPT->a[iPTDst].u = PteSrc.u;
1601 }
1602#else
1603 Assert(!pShwPage->fDirty);
1604#endif
1605
1606 if ( PteSrc.n.u1Present
1607 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1608 {
1609 /*
1610 * Find the ram range.
1611 */
1612 PPGMPAGE pPage;
1613 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GST_GET_PTE_GCPHYS(PteSrc), &pPage);
1614 if (RT_SUCCESS(rc))
1615 {
1616 /* Ignore ballooned pages.
1617 Don't return errors or use a fatal assert here as part of a
1618 shadow sync range might included ballooned pages. */
1619 if (PGM_PAGE_IS_BALLOONED(pPage))
1620 {
1621 Assert(!SHW_PTE_IS_P(*pPteDst)); /** @todo user tracking needs updating if this triggers. */
1622 return;
1623 }
1624
1625#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1626 /* Make the page writable if necessary. */
1627 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1628 && ( PGM_PAGE_IS_ZERO(pPage)
1629 || ( PteSrc.n.u1Write
1630 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1631# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1632 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1633# endif
1634# ifdef VBOX_WITH_PAGE_SHARING
1635 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1636# endif
1637 )
1638 )
1639 )
1640 {
1641 rc = pgmPhysPageMakeWritable(pVM, pPage, GST_GET_PTE_GCPHYS(PteSrc));
1642 AssertRC(rc);
1643 }
1644#endif
1645
1646 /*
1647 * Make page table entry.
1648 */
1649 SHWPTE PteDst;
1650 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1651 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, GST_GET_PTE_SHW_FLAGS(pVCpu, PteSrc), &PteDst);
1652 else
1653 {
1654#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1655 /*
1656 * If the page or page directory entry is not marked accessed,
1657 * we mark the page not present.
1658 */
1659 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1660 {
1661 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1662 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,AccessedPage));
1663 SHW_PTE_SET(PteDst, 0);
1664 }
1665 /*
1666 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1667 * when the page is modified.
1668 */
1669 else if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1670 {
1671 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPage));
1672 SHW_PTE_SET(PteDst,
1673 GST_GET_PTE_SHW_FLAGS(pVCpu, PteSrc)
1674 | PGM_PAGE_GET_HCPHYS(pPage)
1675 | PGM_PTFLAGS_TRACK_DIRTY);
1676 SHW_PTE_SET_RO(PteDst);
1677 }
1678 else
1679#endif
1680 {
1681 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageSkipped));
1682#if PGM_SHW_TYPE == PGM_TYPE_EPT
1683 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1684 PteDst.n.u1Present = 1;
1685 PteDst.n.u1Write = 1;
1686 PteDst.n.u1Execute = 1;
1687 PteDst.n.u1IgnorePAT = 1;
1688 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1689 /* PteDst.n.u1Size = 0 */
1690#else
1691 SHW_PTE_SET(PteDst, GST_GET_PTE_SHW_FLAGS(pVCpu, PteSrc) | PGM_PAGE_GET_HCPHYS(pPage));
1692#endif
1693 }
1694
1695 /*
1696 * Make sure only allocated pages are mapped writable.
1697 */
1698 if ( SHW_PTE_IS_P_RW(PteDst)
1699 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1700 {
1701 /* Still applies to shared pages. */
1702 Assert(!PGM_PAGE_IS_ZERO(pPage));
1703 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet. Why, isn't it? */
1704 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)GST_GET_PTE_GCPHYS(PteSrc), pPage, iPTDst));
1705 }
1706 }
1707
1708 /*
1709 * Keep user track up to date.
1710 */
1711 if (SHW_PTE_IS_P(PteDst))
1712 {
1713 if (!SHW_PTE_IS_P(*pPteDst))
1714 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1715 else if (SHW_PTE_GET_HCPHYS(*pPteDst) != SHW_PTE_GET_HCPHYS(PteDst))
1716 {
1717 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst), SHW_PTE_LOG64(PteDst)));
1718 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1719 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1720 }
1721 }
1722 else if (SHW_PTE_IS_P(*pPteDst))
1723 {
1724 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1725 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1726 }
1727
1728 /*
1729 * Update statistics and commit the entry.
1730 */
1731#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1732 if (!PteSrc.n.u1Global)
1733 pShwPage->fSeenNonGlobal = true;
1734#endif
1735 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
1736 return;
1737 }
1738
1739/** @todo count these three different kinds. */
1740 Log2(("SyncPageWorker: invalid address in Pte\n"));
1741 }
1742 else if (!PteSrc.n.u1Present)
1743 Log2(("SyncPageWorker: page not present in Pte\n"));
1744 else
1745 Log2(("SyncPageWorker: invalid Pte\n"));
1746
1747 /*
1748 * The page is not present or the PTE is bad. Replace the shadow PTE by
1749 * an empty entry, making sure to keep the user tracking up to date.
1750 */
1751 if (SHW_PTE_IS_P(*pPteDst))
1752 {
1753 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1754 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1755 }
1756 SHW_PTE_ATOMIC_SET(*pPteDst, 0);
1757}
1758
1759
1760/**
1761 * Syncs a guest OS page.
1762 *
1763 * There are no conflicts at this point, neither is there any need for
1764 * page table allocations.
1765 *
1766 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
1767 * When called in AMD64 guest mode, the guest PML4E shall be valid.
1768 *
1769 * @returns VBox status code.
1770 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1771 * @param pVCpu The VMCPU handle.
1772 * @param PdeSrc Page directory entry of the guest.
1773 * @param GCPtrPage Guest context page address.
1774 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1775 * @param uErr Fault error (X86_TRAP_PF_*).
1776 */
1777static int PGM_BTH_NAME(SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1778{
1779 PVM pVM = pVCpu->CTX_SUFF(pVM);
1780 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1781 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1782
1783 Assert(PGMIsLockOwner(pVM));
1784
1785#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1786 || PGM_GST_TYPE == PGM_TYPE_PAE \
1787 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1788 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1789 && PGM_SHW_TYPE != PGM_TYPE_EPT
1790
1791 /*
1792 * Assert preconditions.
1793 */
1794 Assert(PdeSrc.n.u1Present);
1795 Assert(cPages);
1796# if 0 /* rarely useful; leave for debugging. */
1797 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1798# endif
1799
1800 /*
1801 * Get the shadow PDE, find the shadow page table in the pool.
1802 */
1803# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1804 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1805 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1806
1807 /* Fetch the pgm pool shadow descriptor. */
1808 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1809 Assert(pShwPde);
1810
1811# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1812 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1813 PPGMPOOLPAGE pShwPde = NULL;
1814 PX86PDPAE pPDDst;
1815
1816 /* Fetch the pgm pool shadow descriptor. */
1817 int rc2 = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
1818 AssertRCSuccessReturn(rc2, rc2);
1819 Assert(pShwPde);
1820
1821 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1822 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1823
1824# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1825 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1826 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1827 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1828 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1829
1830 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1831 AssertRCSuccessReturn(rc2, rc2);
1832 Assert(pPDDst && pPdptDst);
1833 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1834# endif
1835 SHWPDE PdeDst = *pPdeDst;
1836
1837 /*
1838 * - In the guest SMP case we could have blocked while another VCPU reused
1839 * this page table.
1840 * - With W7-64 we may also take this path when the the A bit is cleared on
1841 * higher level tables (PDPE/PML4E). The guest does not invalidate the
1842 * relevant TLB entries. If we're write monitoring any page mapped by
1843 * the modified entry, we may end up here with a "stale" TLB entry.
1844 */
1845 if (!PdeDst.n.u1Present)
1846 {
1847 Log(("CPU%u: SyncPage: Pde at %RGv changed behind our back? (pPdeDst=%p/%RX64) uErr=%#x\n", pVCpu->idCpu, GCPtrPage, pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
1848 AssertMsg(pVM->cCpus > 1 || (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW),
1849 ("Unexpected missing PDE p=%p/%RX64 uErr=%#x\n", pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
1850 if (uErr & X86_TRAP_PF_P)
1851 PGM_INVL_PG(pVCpu, GCPtrPage);
1852 return VINF_SUCCESS; /* force the instruction to be executed again. */
1853 }
1854
1855 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1856 Assert(pShwPage);
1857
1858# if PGM_GST_TYPE == PGM_TYPE_AMD64
1859 /* Fetch the pgm pool shadow descriptor. */
1860 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1861 Assert(pShwPde);
1862# endif
1863
1864 /*
1865 * Check that the page is present and that the shadow PDE isn't out of sync.
1866 */
1867 const bool fBigPage = PdeSrc.b.u1Size && GST_IS_PSE_ACTIVE(pVCpu);
1868 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
1869 RTGCPHYS GCPhys;
1870 if (!fBigPage)
1871 {
1872 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1873# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1874 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1875 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1876# endif
1877 }
1878 else
1879 {
1880 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1881# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1882 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1883 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1884# endif
1885 }
1886 /** @todo This doesn't check the G bit of 2/4MB pages. FIXME */
1887 if ( fPdeValid
1888 && pShwPage->GCPhys == GCPhys
1889 && PdeSrc.n.u1Present
1890 && PdeSrc.n.u1User == PdeDst.n.u1User
1891 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1892# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1893 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !GST_IS_NX_ACTIVE(pVCpu))
1894# endif
1895 )
1896 {
1897 /*
1898 * Check that the PDE is marked accessed already.
1899 * Since we set the accessed bit *before* getting here on a #PF, this
1900 * check is only meant for dealing with non-#PF'ing paths.
1901 */
1902 if (PdeSrc.n.u1Accessed)
1903 {
1904 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1905 if (!fBigPage)
1906 {
1907 /*
1908 * 4KB Page - Map the guest page table.
1909 */
1910 PGSTPT pPTSrc;
1911 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1912 if (RT_SUCCESS(rc))
1913 {
1914# ifdef PGM_SYNC_N_PAGES
1915 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1916 if ( cPages > 1
1917 && !(uErr & X86_TRAP_PF_P)
1918 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1919 {
1920 /*
1921 * This code path is currently only taken when the caller is PGMTrap0eHandler
1922 * for non-present pages!
1923 *
1924 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1925 * deal with locality.
1926 */
1927 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1928 const unsigned iPTDstPage = iPTDst;
1929# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1930 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1931 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1932# else
1933 const unsigned offPTSrc = 0;
1934# endif
1935 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1936 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1937 iPTDst = 0;
1938 else
1939 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1940
1941 for (; iPTDst < iPTDstEnd; iPTDst++)
1942 {
1943 const PGSTPTE pPteSrc = &pPTSrc->a[offPTSrc + iPTDst];
1944
1945 if ( pPteSrc->n.u1Present
1946 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
1947 {
1948 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1949 NOREF(GCPtrCurPage);
1950#ifndef IN_RING0
1951 /*
1952 * Assuming kernel code will be marked as supervisor - and not as user level
1953 * and executed using a conforming code selector - And marked as readonly.
1954 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1955 */
1956 PPGMPAGE pPage;
1957 if ( ((PdeSrc.u & pPteSrc->u) & (X86_PTE_RW | X86_PTE_US))
1958 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1959 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
1960 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK))
1961 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1962 )
1963#endif /* else: CSAM not active */
1964 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, *pPteSrc, pShwPage, iPTDst);
1965 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1966 GCPtrCurPage, pPteSrc->n.u1Present,
1967 pPteSrc->n.u1Write & PdeSrc.n.u1Write,
1968 pPteSrc->n.u1User & PdeSrc.n.u1User,
1969 (uint64_t)pPteSrc->u,
1970 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1971 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1972 }
1973 }
1974 }
1975 else
1976# endif /* PGM_SYNC_N_PAGES */
1977 {
1978 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1979 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1980 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1981 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1982 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1983 GCPtrPage, PteSrc.n.u1Present,
1984 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1985 PteSrc.n.u1User & PdeSrc.n.u1User,
1986 (uint64_t)PteSrc.u,
1987 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1988 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1989 }
1990 }
1991 else /* MMIO or invalid page: emulated in #PF handler. */
1992 {
1993 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1994 Assert(!SHW_PTE_IS_P(pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK]));
1995 }
1996 }
1997 else
1998 {
1999 /*
2000 * 4/2MB page - lazy syncing shadow 4K pages.
2001 * (There are many causes of getting here, it's no longer only CSAM.)
2002 */
2003 /* Calculate the GC physical address of this 4KB shadow page. */
2004 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
2005 /* Find ram range. */
2006 PPGMPAGE pPage;
2007 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
2008 if (RT_SUCCESS(rc))
2009 {
2010 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
2011
2012# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2013 /* Try to make the page writable if necessary. */
2014 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2015 && ( PGM_PAGE_IS_ZERO(pPage)
2016 || ( PdeSrc.n.u1Write
2017 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2018# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2019 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2020# endif
2021# ifdef VBOX_WITH_PAGE_SHARING
2022 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2023# endif
2024 )
2025 )
2026 )
2027 {
2028 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2029 AssertRC(rc);
2030 }
2031# endif
2032
2033 /*
2034 * Make shadow PTE entry.
2035 */
2036 SHWPTE PteDst;
2037 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2038 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc), &PteDst);
2039 else
2040 SHW_PTE_SET(PteDst, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc) | PGM_PAGE_GET_HCPHYS(pPage));
2041
2042 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2043 if ( SHW_PTE_IS_P(PteDst)
2044 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2045 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2046
2047 /* Make sure only allocated pages are mapped writable. */
2048 if ( SHW_PTE_IS_P_RW(PteDst)
2049 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2050 {
2051 /* Still applies to shared pages. */
2052 Assert(!PGM_PAGE_IS_ZERO(pPage));
2053 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2054 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
2055 }
2056
2057 SHW_PTE_ATOMIC_SET2(pPTDst->a[iPTDst], PteDst);
2058
2059 /*
2060 * If the page is not flagged as dirty and is writable, then make it read-only
2061 * at PD level, so we can set the dirty bit when the page is modified.
2062 *
2063 * ASSUMES that page access handlers are implemented on page table entry level.
2064 * Thus we will first catch the dirty access and set PDE.D and restart. If
2065 * there is an access handler, we'll trap again and let it work on the problem.
2066 */
2067 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
2068 * As for invlpg, it simply frees the whole shadow PT.
2069 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
2070 if ( !PdeSrc.b.u1Dirty
2071 && PdeSrc.b.u1Write)
2072 {
2073 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageBig));
2074 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2075 PdeDst.n.u1Write = 0;
2076 }
2077 else
2078 {
2079 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2080 PdeDst.n.u1Write = PdeSrc.n.u1Write;
2081 }
2082 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2083 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
2084 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
2085 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2086 }
2087 else
2088 {
2089 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
2090 /** @todo must wipe the shadow page table entry in this
2091 * case. */
2092 }
2093 }
2094 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2095 return VINF_SUCCESS;
2096 }
2097
2098 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPagePDNAs));
2099 }
2100 else if (fPdeValid)
2101 {
2102 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2103 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2104 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2105 }
2106 else
2107 {
2108/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSyncAndInvalid));
2109 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2110 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2111 }
2112
2113 /*
2114 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
2115 * Yea, I'm lazy.
2116 */
2117 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
2118 ASMAtomicWriteSize(pPdeDst, 0);
2119
2120 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2121 PGM_INVL_VCPU_TLBS(pVCpu);
2122 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
2123
2124
2125#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2126 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2127 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2128 && !defined(IN_RC)
2129
2130# ifdef PGM_SYNC_N_PAGES
2131 /*
2132 * Get the shadow PDE, find the shadow page table in the pool.
2133 */
2134# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2135 X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
2136
2137# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2138 X86PDEPAE PdeDst = pgmShwGetPaePDE(pVCpu, GCPtrPage);
2139
2140# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2141 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2142 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2143 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2144 X86PDEPAE PdeDst;
2145 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2146
2147 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2148 AssertRCSuccessReturn(rc, rc);
2149 Assert(pPDDst && pPdptDst);
2150 PdeDst = pPDDst->a[iPDDst];
2151# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2152 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2153 PEPTPD pPDDst;
2154 EPTPDE PdeDst;
2155
2156 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2157 if (rc != VINF_SUCCESS)
2158 {
2159 AssertRC(rc);
2160 return rc;
2161 }
2162 Assert(pPDDst);
2163 PdeDst = pPDDst->a[iPDDst];
2164# endif
2165 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2166 if (!PdeDst.n.u1Present)
2167 {
2168 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2169 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2170 return VINF_SUCCESS; /* force the instruction to be executed again. */
2171 }
2172
2173 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2174 if (PdeDst.n.u1Size)
2175 {
2176 Assert(pVM->pgm.s.fNestedPaging);
2177 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2178 return VINF_SUCCESS;
2179 }
2180
2181 /* Mask away the page offset. */
2182 GCPtrPage &= ~((RTGCPTR)0xfff);
2183
2184 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2185 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2186
2187 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2188 if ( cPages > 1
2189 && !(uErr & X86_TRAP_PF_P)
2190 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2191 {
2192 /*
2193 * This code path is currently only taken when the caller is PGMTrap0eHandler
2194 * for non-present pages!
2195 *
2196 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2197 * deal with locality.
2198 */
2199 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2200 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2201 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2202 iPTDst = 0;
2203 else
2204 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2205 for (; iPTDst < iPTDstEnd; iPTDst++)
2206 {
2207 if (!SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2208 {
2209 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2210 GSTPTE PteSrc;
2211
2212 /* Fake the page table entry */
2213 PteSrc.u = GCPtrCurPage;
2214 PteSrc.n.u1Present = 1;
2215 PteSrc.n.u1Dirty = 1;
2216 PteSrc.n.u1Accessed = 1;
2217 PteSrc.n.u1Write = 1;
2218 PteSrc.n.u1User = 1;
2219
2220 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2221 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2222 GCPtrCurPage, PteSrc.n.u1Present,
2223 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2224 PteSrc.n.u1User & PdeSrc.n.u1User,
2225 (uint64_t)PteSrc.u,
2226 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2227 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2228
2229 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2230 break;
2231 }
2232 else
2233 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, SHW_PTE_LOG64(pPTDst->a[iPTDst]) ));
2234 }
2235 }
2236 else
2237# endif /* PGM_SYNC_N_PAGES */
2238 {
2239 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2240 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2241 GSTPTE PteSrc;
2242
2243 /* Fake the page table entry */
2244 PteSrc.u = GCPtrCurPage;
2245 PteSrc.n.u1Present = 1;
2246 PteSrc.n.u1Dirty = 1;
2247 PteSrc.n.u1Accessed = 1;
2248 PteSrc.n.u1Write = 1;
2249 PteSrc.n.u1User = 1;
2250 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2251
2252 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2253 GCPtrPage, PteSrc.n.u1Present,
2254 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2255 PteSrc.n.u1User & PdeSrc.n.u1User,
2256 (uint64_t)PteSrc.u,
2257 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2258 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2259 }
2260 return VINF_SUCCESS;
2261
2262#else
2263 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2264 return VERR_INTERNAL_ERROR;
2265#endif
2266}
2267
2268
2269#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2270
2271/**
2272 * CheckPageFault helper for returning a page fault indicating a non-present
2273 * (NP) entry in the page translation structures.
2274 *
2275 * @returns VINF_EM_RAW_GUEST_TRAP.
2276 * @param pVCpu The virtual CPU to operate on.
2277 * @param uErr The error code of the shadow fault. Corrections to
2278 * TRPM's copy will be made if necessary.
2279 * @param GCPtrPage For logging.
2280 * @param uPageFaultLevel For logging.
2281 */
2282DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnNP)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2283{
2284 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyTrackRealPF));
2285 AssertMsg(!(uErr & X86_TRAP_PF_P), ("%#x\n", uErr));
2286 AssertMsg(!(uErr & X86_TRAP_PF_RSVD), ("%#x\n", uErr));
2287 if (uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2288 TRPMSetErrorCode(pVCpu, uErr & ~(X86_TRAP_PF_RSVD | X86_TRAP_PF_P));
2289
2290 Log(("CheckPageFault: real page fault (notp) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2291 return VINF_EM_RAW_GUEST_TRAP;
2292}
2293
2294
2295/**
2296 * CheckPageFault helper for returning a page fault indicating a reserved bit
2297 * (RSVD) error in the page translation structures.
2298 *
2299 * @returns VINF_EM_RAW_GUEST_TRAP.
2300 * @param pVCpu The virtual CPU to operate on.
2301 * @param uErr The error code of the shadow fault. Corrections to
2302 * TRPM's copy will be made if necessary.
2303 * @param GCPtrPage For logging.
2304 * @param uPageFaultLevel For logging.
2305 */
2306DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnRSVD)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2307{
2308 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyTrackRealPF));
2309 if ((uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2310 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_RSVD | X86_TRAP_PF_P);
2311
2312 Log(("CheckPageFault: real page fault (rsvd) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2313 return VINF_EM_RAW_GUEST_TRAP;
2314}
2315
2316
2317/**
2318 * CheckPageFault helper for returning a page protection fault (P).
2319 *
2320 * @returns VINF_EM_RAW_GUEST_TRAP.
2321 * @param pVCpu The virtual CPU to operate on.
2322 * @param uErr The error code of the shadow fault. Corrections to
2323 * TRPM's copy will be made if necessary.
2324 * @param GCPtrPage For logging.
2325 * @param uPageFaultLevel For logging.
2326 */
2327DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnProt)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2328{
2329 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyTrackRealPF));
2330 AssertMsg(uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID), ("%#x\n", uErr));
2331 if ((uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) != X86_TRAP_PF_P)
2332 TRPMSetErrorCode(pVCpu, (uErr & ~X86_TRAP_PF_RSVD) | X86_TRAP_PF_P);
2333
2334 Log(("CheckPageFault: real page fault (prot) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2335 return VINF_EM_RAW_GUEST_TRAP;
2336}
2337
2338
2339/**
2340 * Handle dirty bit tracking faults.
2341 *
2342 * @returns VBox status code.
2343 * @param pVCpu The VMCPU handle.
2344 * @param uErr Page fault error code.
2345 * @param pPdeSrc Guest page directory entry.
2346 * @param pPdeDst Shadow page directory entry.
2347 * @param GCPtrPage Guest context page address.
2348 */
2349static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage)
2350{
2351 PVM pVM = pVCpu->CTX_SUFF(pVM);
2352 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2353
2354 Assert(PGMIsLockOwner(pVM));
2355
2356 /*
2357 * Handle big page.
2358 */
2359 if (pPdeSrc->b.u1Size && GST_IS_PSE_ACTIVE(pVCpu))
2360 {
2361 if ( pPdeDst->n.u1Present
2362 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2363 {
2364 SHWPDE PdeDst = *pPdeDst;
2365
2366 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageTrap));
2367 Assert(pPdeSrc->b.u1Write);
2368
2369 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2370 * fault again and take this path to only invalidate the entry (see below).
2371 */
2372 PdeDst.n.u1Write = 1;
2373 PdeDst.n.u1Accessed = 1;
2374 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2375 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2376 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2377 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2378 }
2379
2380# ifdef IN_RING0
2381 /* Check for stale TLB entry; only applies to the SMP guest case. */
2382 if ( pVM->cCpus > 1
2383 && pPdeDst->n.u1Write
2384 && pPdeDst->n.u1Accessed)
2385 {
2386 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2387 if (pShwPage)
2388 {
2389 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2390 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2391 if (SHW_PTE_IS_P_RW(*pPteDst))
2392 {
2393 /* Stale TLB entry. */
2394 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageStale));
2395 PGM_INVL_PG(pVCpu, GCPtrPage);
2396 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2397 }
2398 }
2399 }
2400# endif /* IN_RING0 */
2401 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2402 }
2403
2404 /*
2405 * Map the guest page table.
2406 */
2407 PGSTPT pPTSrc;
2408 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(*pPdeSrc), &pPTSrc);
2409 if (RT_FAILURE(rc))
2410 {
2411 AssertRC(rc);
2412 return rc;
2413 }
2414
2415 if (pPdeDst->n.u1Present)
2416 {
2417 GSTPTE const *pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2418 const GSTPTE PteSrc = *pPteSrc;
2419
2420#ifndef IN_RING0
2421 /* Bail out here as pgmPoolGetPage will return NULL and we'll crash below.
2422 * Our individual shadow handlers will provide more information and force a fatal exit.
2423 */
2424 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2425 {
2426 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2427 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2428 }
2429#endif
2430 /*
2431 * Map shadow page table.
2432 */
2433 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2434 if (pShwPage)
2435 {
2436 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2437 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2438 if (SHW_PTE_IS_P(*pPteDst)) /** @todo Optimize accessed bit emulation? */
2439 {
2440 if (SHW_PTE_IS_TRACK_DIRTY(*pPteDst))
2441 {
2442 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GST_GET_PTE_GCPHYS(*pPteSrc));
2443 SHWPTE PteDst = *pPteDst;
2444
2445 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2446 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageTrap));
2447
2448 Assert(pPteSrc->n.u1Write);
2449
2450 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
2451 * entry will not harm; write access will simply fault again and
2452 * take this path to only invalidate the entry.
2453 */
2454 if (RT_LIKELY(pPage))
2455 {
2456 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2457 {
2458 AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
2459 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
2460 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2461 SHW_PTE_SET_RO(PteDst);
2462 }
2463 else
2464 {
2465 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2466 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2467 {
2468 rc = pgmPhysPageMakeWritable(pVM, pPage, GST_GET_PTE_GCPHYS(*pPteSrc));
2469 AssertRC(rc);
2470 }
2471 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2472 SHW_PTE_SET_RW(PteDst);
2473 else
2474 {
2475 /* Still applies to shared pages. */
2476 Assert(!PGM_PAGE_IS_ZERO(pPage));
2477 SHW_PTE_SET_RO(PteDst);
2478 }
2479 }
2480 }
2481 else
2482 SHW_PTE_SET_RW(PteDst); /** @todo r=bird: This doesn't make sense to me. */
2483
2484 SHW_PTE_SET(PteDst, (SHW_PTE_GET_U(PteDst) | X86_PTE_D | X86_PTE_A) & ~(uint64_t)PGM_PTFLAGS_TRACK_DIRTY);
2485 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
2486 PGM_INVL_PG(pVCpu, GCPtrPage);
2487 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2488 }
2489
2490# ifdef IN_RING0
2491 /* Check for stale TLB entry; only applies to the SMP guest case. */
2492 if ( pVM->cCpus > 1
2493 && SHW_PTE_IS_RW(*pPteDst)
2494 && SHW_PTE_IS_A(*pPteDst))
2495 {
2496 /* Stale TLB entry. */
2497 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageStale));
2498 PGM_INVL_PG(pVCpu, GCPtrPage);
2499 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2500 }
2501# endif
2502 }
2503 }
2504 else
2505 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2506 }
2507
2508 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2509}
2510
2511#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2512
2513
2514/**
2515 * Sync a shadow page table.
2516 *
2517 * The shadow page table is not present in the shadow PDE.
2518 *
2519 * Handles mapping conflicts.
2520 *
2521 * This is called by VerifyAccessSyncPage, PrefetchPage, InvalidatePage (on
2522 * conflict), and Trap0eHandler.
2523 *
2524 * A precodition for this method is that the shadow PDE is not present. The
2525 * caller must take the PGM lock before checking this and continue to hold it
2526 * when calling this method.
2527 *
2528 * @returns VBox status code.
2529 * @param pVCpu The VMCPU handle.
2530 * @param iPD Page directory index.
2531 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2532 * Assume this is a temporary mapping.
2533 * @param GCPtrPage GC Pointer of the page that caused the fault
2534 */
2535static int PGM_BTH_NAME(SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2536{
2537 PVM pVM = pVCpu->CTX_SUFF(pVM);
2538 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2539
2540 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
2541#if 0 /* rarely useful; leave for debugging. */
2542 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2543#endif
2544 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2545
2546 Assert(PGMIsLocked(pVM));
2547
2548#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2549 || PGM_GST_TYPE == PGM_TYPE_PAE \
2550 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2551 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2552 && PGM_SHW_TYPE != PGM_TYPE_EPT
2553
2554 int rc = VINF_SUCCESS;
2555
2556 /*
2557 * Some input validation first.
2558 */
2559 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2560
2561 /*
2562 * Get the relevant shadow PDE entry.
2563 */
2564# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2565 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2566 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
2567
2568 /* Fetch the pgm pool shadow descriptor. */
2569 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2570 Assert(pShwPde);
2571
2572# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2573 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2574 PPGMPOOLPAGE pShwPde = NULL;
2575 PX86PDPAE pPDDst;
2576 PSHWPDE pPdeDst;
2577
2578 /* Fetch the pgm pool shadow descriptor. */
2579 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
2580 AssertRCSuccessReturn(rc, rc);
2581 Assert(pShwPde);
2582
2583 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
2584 pPdeDst = &pPDDst->a[iPDDst];
2585
2586# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2587 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2588 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2589 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2590 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2591 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2592 AssertRCSuccessReturn(rc, rc);
2593 Assert(pPDDst);
2594 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2595# endif
2596 SHWPDE PdeDst = *pPdeDst;
2597
2598# if PGM_GST_TYPE == PGM_TYPE_AMD64
2599 /* Fetch the pgm pool shadow descriptor. */
2600 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2601 Assert(pShwPde);
2602# endif
2603
2604# ifndef PGM_WITHOUT_MAPPINGS
2605 /*
2606 * Check for conflicts.
2607 * RC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2608 * R3: Simply resolve the conflict.
2609 */
2610 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2611 {
2612 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2613# ifndef IN_RING3
2614 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2615 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
2616 return VERR_ADDRESS_CONFLICT;
2617
2618# else /* IN_RING3 */
2619 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2620 Assert(pMapping);
2621# if PGM_GST_TYPE == PGM_TYPE_32BIT
2622 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2623# elif PGM_GST_TYPE == PGM_TYPE_PAE
2624 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2625# else
2626 AssertFailed(); /* can't happen for amd64 */
2627# endif
2628 if (RT_FAILURE(rc))
2629 {
2630 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
2631 return rc;
2632 }
2633 PdeDst = *pPdeDst;
2634# endif /* IN_RING3 */
2635 }
2636# endif /* !PGM_WITHOUT_MAPPINGS */
2637 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2638
2639 /*
2640 * Sync the page directory entry.
2641 */
2642 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2643 const bool fPageTable = !PdeSrc.b.u1Size || !GST_IS_PSE_ACTIVE(pVCpu);
2644 if ( PdeSrc.n.u1Present
2645 && (fPageTable ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc)) )
2646 {
2647 /*
2648 * Allocate & map the page table.
2649 */
2650 PSHWPT pPTDst;
2651 PPGMPOOLPAGE pShwPage;
2652 RTGCPHYS GCPhys;
2653 if (fPageTable)
2654 {
2655 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
2656# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2657 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2658 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2659# endif
2660 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2661 }
2662 else
2663 {
2664 PGMPOOLACCESS enmAccess;
2665# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2666 const bool fNoExecute = PdeSrc.n.u1NoExecute && GST_IS_NX_ACTIVE(pVCpu);
2667# else
2668 const bool fNoExecute = false;
2669# endif
2670
2671 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
2672# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2673 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2674 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2675# endif
2676 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2677 if (PdeSrc.n.u1User)
2678 {
2679 if (PdeSrc.n.u1Write)
2680 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2681 else
2682 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2683 }
2684 else
2685 {
2686 if (PdeSrc.n.u1Write)
2687 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2688 else
2689 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2690 }
2691 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, false /*fLockPage*/,
2692 &pShwPage);
2693 }
2694 if (rc == VINF_SUCCESS)
2695 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2696 else if (rc == VINF_PGM_CACHED_PAGE)
2697 {
2698 /*
2699 * The PT was cached, just hook it up.
2700 */
2701 if (fPageTable)
2702 PdeDst.u = pShwPage->Core.Key | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2703 else
2704 {
2705 PdeDst.u = pShwPage->Core.Key | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2706 /* (see explanation and assumptions further down.) */
2707 if ( !PdeSrc.b.u1Dirty
2708 && PdeSrc.b.u1Write)
2709 {
2710 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageBig));
2711 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2712 PdeDst.b.u1Write = 0;
2713 }
2714 }
2715 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2716 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2717 return VINF_SUCCESS;
2718 }
2719 else if (rc == VERR_PGM_POOL_FLUSHED)
2720 {
2721 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2722 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2723 return VINF_PGM_SYNC_CR3;
2724 }
2725 else
2726 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2727 /** @todo Why do we bother preserving X86_PDE_AVL_MASK here?
2728 * Both PGM_PDFLAGS_MAPPING and PGM_PDFLAGS_TRACK_DIRTY should be
2729 * irrelevant at this point. */
2730 PdeDst.u &= X86_PDE_AVL_MASK;
2731 PdeDst.u |= pShwPage->Core.Key;
2732
2733 /*
2734 * Page directory has been accessed (this is a fault situation, remember).
2735 */
2736 /** @todo
2737 * Well, when the caller is PrefetchPage or InvalidatePage is isn't a
2738 * fault situation. What's more, the Trap0eHandler has already set the
2739 * accessed bit. So, it's actually just VerifyAccessSyncPage which
2740 * might need setting the accessed flag.
2741 *
2742 * The best idea is to leave this change to the caller and add an
2743 * assertion that it's set already. */
2744 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2745 if (fPageTable)
2746 {
2747 /*
2748 * Page table - 4KB.
2749 *
2750 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2751 */
2752 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2753 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2754 PGSTPT pPTSrc;
2755 rc = PGM_GCPHYS_2_PTR(pVM, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
2756 if (RT_SUCCESS(rc))
2757 {
2758 /*
2759 * Start by syncing the page directory entry so CSAM's TLB trick works.
2760 */
2761 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2762 | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2763 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2764 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2765
2766 /*
2767 * Directory/page user or supervisor privilege: (same goes for read/write)
2768 *
2769 * Directory Page Combined
2770 * U/S U/S U/S
2771 * 0 0 0
2772 * 0 1 0
2773 * 1 0 0
2774 * 1 1 1
2775 *
2776 * Simple AND operation. Table listed for completeness.
2777 *
2778 */
2779 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT4K));
2780# ifdef PGM_SYNC_N_PAGES
2781 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2782 unsigned iPTDst = iPTBase;
2783 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2784 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2785 iPTDst = 0;
2786 else
2787 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2788# else /* !PGM_SYNC_N_PAGES */
2789 unsigned iPTDst = 0;
2790 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2791# endif /* !PGM_SYNC_N_PAGES */
2792 RTGCPTR GCPtrCur = (GCPtrPage & ~(RTGCPTR)((1 << SHW_PD_SHIFT) - 1))
2793 | ((RTGCPTR)iPTDst << PAGE_SHIFT);
2794# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2795 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2796 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2797# else
2798 const unsigned offPTSrc = 0;
2799# endif
2800 for (; iPTDst < iPTDstEnd; iPTDst++, GCPtrCur += PAGE_SIZE)
2801 {
2802 const unsigned iPTSrc = iPTDst + offPTSrc;
2803 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2804
2805 if (PteSrc.n.u1Present)
2806 {
2807# ifndef IN_RING0
2808 /*
2809 * Assuming kernel code will be marked as supervisor - and not as user level
2810 * and executed using a conforming code selector - And marked as readonly.
2811 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2812 */
2813 PPGMPAGE pPage;
2814 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2815 || !CSAMDoesPageNeedScanning(pVM, GCPtrCur)
2816 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, GST_GET_PTE_GCPHYS(PteSrc)))
2817 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2818 )
2819# endif
2820 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2821 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2822 GCPtrCur,
2823 PteSrc.n.u1Present,
2824 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2825 PteSrc.n.u1User & PdeSrc.n.u1User,
2826 (uint64_t)PteSrc.u,
2827 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : "", SHW_PTE_LOG64(pPTDst->a[iPTDst]), iPTSrc, PdeSrc.au32[0],
2828 (RTGCPHYS)(GST_GET_PDE_GCPHYS(PdeSrc) + iPTSrc*sizeof(PteSrc)) ));
2829 }
2830 /* else: the page table was cleared by the pool */
2831 } /* for PTEs */
2832 }
2833 }
2834 else
2835 {
2836 /*
2837 * Big page - 2/4MB.
2838 *
2839 * We'll walk the ram range list in parallel and optimize lookups.
2840 * We will only sync on shadow page table at a time.
2841 */
2842 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT4M));
2843
2844 /**
2845 * @todo It might be more efficient to sync only a part of the 4MB
2846 * page (similar to what we do for 4KB PDs).
2847 */
2848
2849 /*
2850 * Start by syncing the page directory entry.
2851 */
2852 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2853 | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2854
2855 /*
2856 * If the page is not flagged as dirty and is writable, then make it read-only
2857 * at PD level, so we can set the dirty bit when the page is modified.
2858 *
2859 * ASSUMES that page access handlers are implemented on page table entry level.
2860 * Thus we will first catch the dirty access and set PDE.D and restart. If
2861 * there is an access handler, we'll trap again and let it work on the problem.
2862 */
2863 /** @todo move the above stuff to a section in the PGM documentation. */
2864 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2865 if ( !PdeSrc.b.u1Dirty
2866 && PdeSrc.b.u1Write)
2867 {
2868 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageBig));
2869 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2870 PdeDst.b.u1Write = 0;
2871 }
2872 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2873 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2874
2875 /*
2876 * Fill the shadow page table.
2877 */
2878 /* Get address and flags from the source PDE. */
2879 SHWPTE PteDstBase;
2880 SHW_PTE_SET(PteDstBase, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc));
2881
2882 /* Loop thru the entries in the shadow PT. */
2883 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2884 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2885 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2886 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2887 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2888 unsigned iPTDst = 0;
2889 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2890 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2891 {
2892 /* Advance ram range list. */
2893 while (pRam && GCPhys > pRam->GCPhysLast)
2894 pRam = pRam->CTX_SUFF(pNext);
2895 if (pRam && GCPhys >= pRam->GCPhys)
2896 {
2897 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2898 do
2899 {
2900 /* Make shadow PTE. */
2901 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2902 SHWPTE PteDst;
2903
2904# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2905 /* Try to make the page writable if necessary. */
2906 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2907 && ( PGM_PAGE_IS_ZERO(pPage)
2908 || ( SHW_PTE_IS_RW(PteDstBase)
2909 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2910# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2911 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2912# endif
2913# ifdef VBOX_WITH_PAGE_SHARING
2914 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2915# endif
2916 && !PGM_PAGE_IS_BALLOONED(pPage))
2917 )
2918 )
2919 {
2920 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2921 AssertRCReturn(rc, rc);
2922 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2923 break;
2924 }
2925# endif
2926
2927 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2928 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, SHW_PTE_GET_U(PteDstBase), &PteDst);
2929 else if (PGM_PAGE_IS_BALLOONED(pPage))
2930 SHW_PTE_SET(PteDst, 0); /* Handle ballooned pages at #PF time. */
2931# ifndef IN_RING0
2932 /*
2933 * Assuming kernel code will be marked as supervisor and not as user level and executed
2934 * using a conforming code selector. Don't check for readonly, as that implies the whole
2935 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2936 */
2937 else if ( !PdeSrc.n.u1User
2938 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
2939 SHW_PTE_SET(PteDst, 0);
2940# endif
2941 else
2942 SHW_PTE_SET(PteDst, PGM_PAGE_GET_HCPHYS(pPage) | SHW_PTE_GET_U(PteDstBase));
2943
2944 /* Only map writable pages writable. */
2945 if ( SHW_PTE_IS_P_RW(PteDst)
2946 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2947 {
2948 /* Still applies to shared pages. */
2949 Assert(!PGM_PAGE_IS_ZERO(pPage));
2950 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2951 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2952 }
2953
2954 if (SHW_PTE_IS_P(PteDst))
2955 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2956
2957 /* commit it (not atomic, new table) */
2958 pPTDst->a[iPTDst] = PteDst;
2959 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2960 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), SHW_PTE_IS_P(PteDst), SHW_PTE_IS_RW(PteDst), SHW_PTE_IS_US(PteDst), SHW_PTE_LOG64(PteDst),
2961 SHW_PTE_IS_TRACK_DIRTY(PteDst) ? " Track-Dirty" : ""));
2962
2963 /* advance */
2964 GCPhys += PAGE_SIZE;
2965 iHCPage++;
2966 iPTDst++;
2967 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2968 && GCPhys <= pRam->GCPhysLast);
2969 }
2970 else if (pRam)
2971 {
2972 Log(("Invalid pages at %RGp\n", GCPhys));
2973 do
2974 {
2975 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2976 GCPhys += PAGE_SIZE;
2977 iPTDst++;
2978 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2979 && GCPhys < pRam->GCPhys);
2980 }
2981 else
2982 {
2983 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2984 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2985 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2986 }
2987 } /* while more PTEs */
2988 } /* 4KB / 4MB */
2989 }
2990 else
2991 AssertRelease(!PdeDst.n.u1Present);
2992
2993 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
2994 if (RT_FAILURE(rc))
2995 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPTFailed));
2996 return rc;
2997
2998#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2999 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3000 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
3001 && !defined(IN_RC)
3002
3003 /*
3004 * Validate input a little bit.
3005 */
3006 int rc = VINF_SUCCESS;
3007# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3008 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3009 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3010
3011 /* Fetch the pgm pool shadow descriptor. */
3012 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
3013 Assert(pShwPde);
3014
3015# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3016 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3017 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
3018 PX86PDPAE pPDDst;
3019 PSHWPDE pPdeDst;
3020
3021 /* Fetch the pgm pool shadow descriptor. */
3022 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
3023 AssertRCSuccessReturn(rc, rc);
3024 Assert(pShwPde);
3025
3026 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
3027 pPdeDst = &pPDDst->a[iPDDst];
3028
3029# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3030 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
3031 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3032 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3033 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3034 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3035 AssertRCSuccessReturn(rc, rc);
3036 Assert(pPDDst);
3037 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3038
3039 /* Fetch the pgm pool shadow descriptor. */
3040 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3041 Assert(pShwPde);
3042
3043# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3044 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3045 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3046 PEPTPD pPDDst;
3047 PEPTPDPT pPdptDst;
3048
3049 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3050 if (rc != VINF_SUCCESS)
3051 {
3052 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3053 AssertRC(rc);
3054 return rc;
3055 }
3056 Assert(pPDDst);
3057 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3058
3059 /* Fetch the pgm pool shadow descriptor. */
3060 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3061 Assert(pShwPde);
3062# endif
3063 SHWPDE PdeDst = *pPdeDst;
3064
3065 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
3066 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3067
3068# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
3069 if (BTH_IS_NP_ACTIVE(pVM))
3070 {
3071 PPGMPAGE pPage;
3072
3073 /* Check if we allocated a big page before for this 2 MB range. */
3074 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
3075 if (RT_SUCCESS(rc))
3076 {
3077 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3078
3079 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3080 {
3081 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3082 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3083 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3084 }
3085 else if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
3086 {
3087 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3088 rc = pgmPhysIsValidLargePage(pVM, GCPtrPage, pPage);
3089 if (RT_SUCCESS(rc))
3090 {
3091 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3092 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3093 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3094 }
3095 }
3096 else if (PGMIsUsingLargePages(pVM))
3097 {
3098 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3099 if (RT_SUCCESS(rc))
3100 {
3101 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3102 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3103 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3104 }
3105 else
3106 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3107 }
3108
3109 if (HCPhys != NIL_RTHCPHYS)
3110 {
3111 PdeDst.u &= X86_PDE_AVL_MASK;
3112 PdeDst.u |= HCPhys;
3113 PdeDst.n.u1Present = 1;
3114 PdeDst.n.u1Write = 1;
3115 PdeDst.b.u1Size = 1;
3116# if PGM_SHW_TYPE == PGM_TYPE_EPT
3117 PdeDst.n.u1Execute = 1;
3118 PdeDst.b.u1IgnorePAT = 1;
3119 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3120# else
3121 PdeDst.n.u1User = 1;
3122# endif
3123 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3124
3125 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3126 /* Add a reference to the first page only. */
3127 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3128
3129 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3130 return VINF_SUCCESS;
3131 }
3132 }
3133 }
3134# endif /* HC_ARCH_BITS == 64 */
3135
3136 GSTPDE PdeSrc;
3137 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3138 PdeSrc.n.u1Present = 1;
3139 PdeSrc.n.u1Write = 1;
3140 PdeSrc.n.u1Accessed = 1;
3141 PdeSrc.n.u1User = 1;
3142
3143 /*
3144 * Allocate & map the page table.
3145 */
3146 PSHWPT pPTDst;
3147 PPGMPOOLPAGE pShwPage;
3148 RTGCPHYS GCPhys;
3149
3150 /* Virtual address = physical address */
3151 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3152 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3153
3154 if ( rc == VINF_SUCCESS
3155 || rc == VINF_PGM_CACHED_PAGE)
3156 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
3157 else
3158 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3159
3160 if (rc == VINF_SUCCESS)
3161 {
3162 /* New page table; fully set it up. */
3163 Assert(pPTDst);
3164
3165 /* Mask away the page offset. */
3166 GCPtrPage &= ~((RTGCPTR)0xfff);
3167
3168 for (unsigned iPTDst = 0; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
3169 {
3170 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
3171 GSTPTE PteSrc;
3172
3173 /* Fake the page table entry */
3174 PteSrc.u = GCPtrCurPage;
3175 PteSrc.n.u1Present = 1;
3176 PteSrc.n.u1Dirty = 1;
3177 PteSrc.n.u1Accessed = 1;
3178 PteSrc.n.u1Write = 1;
3179 PteSrc.n.u1User = 1;
3180
3181 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
3182 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
3183 GCPtrCurPage, PteSrc.n.u1Present,
3184 PteSrc.n.u1Write & PdeSrc.n.u1Write,
3185 PteSrc.n.u1User & PdeSrc.n.u1User,
3186 (uint64_t)PteSrc.u,
3187 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
3188 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
3189
3190 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
3191 break;
3192 }
3193 }
3194 /* else cached entry; assume it's still fully valid. */
3195
3196 /* Save the new PDE. */
3197 PdeDst.u &= X86_PDE_AVL_MASK;
3198 PdeDst.u |= pShwPage->Core.Key;
3199 PdeDst.n.u1Present = 1;
3200 PdeDst.n.u1Write = 1;
3201# if PGM_SHW_TYPE == PGM_TYPE_EPT
3202 PdeDst.n.u1Execute = 1;
3203# else
3204 PdeDst.n.u1User = 1;
3205 PdeDst.n.u1Accessed = 1;
3206# endif
3207 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3208
3209 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3210 return rc;
3211
3212#else
3213 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3214 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3215 return VERR_INTERNAL_ERROR;
3216#endif
3217}
3218
3219
3220
3221/**
3222 * Prefetch a page/set of pages.
3223 *
3224 * Typically used to sync commonly used pages before entering raw mode
3225 * after a CR3 reload.
3226 *
3227 * @returns VBox status code.
3228 * @param pVCpu The VMCPU handle.
3229 * @param GCPtrPage Page to invalidate.
3230 */
3231PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3232{
3233#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3234 || PGM_GST_TYPE == PGM_TYPE_REAL \
3235 || PGM_GST_TYPE == PGM_TYPE_PROT \
3236 || PGM_GST_TYPE == PGM_TYPE_PAE \
3237 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3238 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3239 && PGM_SHW_TYPE != PGM_TYPE_EPT
3240
3241 /*
3242 * Check that all Guest levels thru the PDE are present, getting the
3243 * PD and PDE in the processes.
3244 */
3245 int rc = VINF_SUCCESS;
3246# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3247# if PGM_GST_TYPE == PGM_TYPE_32BIT
3248 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3249 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3250# elif PGM_GST_TYPE == PGM_TYPE_PAE
3251 unsigned iPDSrc;
3252 X86PDPE PdpeSrc;
3253 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3254 if (!pPDSrc)
3255 return VINF_SUCCESS; /* not present */
3256# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3257 unsigned iPDSrc;
3258 PX86PML4E pPml4eSrc;
3259 X86PDPE PdpeSrc;
3260 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3261 if (!pPDSrc)
3262 return VINF_SUCCESS; /* not present */
3263# endif
3264 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3265# else
3266 PGSTPD pPDSrc = NULL;
3267 const unsigned iPDSrc = 0;
3268 GSTPDE PdeSrc;
3269
3270 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3271 PdeSrc.n.u1Present = 1;
3272 PdeSrc.n.u1Write = 1;
3273 PdeSrc.n.u1Accessed = 1;
3274 PdeSrc.n.u1User = 1;
3275# endif
3276
3277 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3278 {
3279 PVM pVM = pVCpu->CTX_SUFF(pVM);
3280 pgmLock(pVM);
3281
3282# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3283 const X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
3284# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3285 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3286 PX86PDPAE pPDDst;
3287 X86PDEPAE PdeDst;
3288# if PGM_GST_TYPE != PGM_TYPE_PAE
3289 X86PDPE PdpeSrc;
3290
3291 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3292 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3293# endif
3294 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3295 if (rc != VINF_SUCCESS)
3296 {
3297 pgmUnlock(pVM);
3298 AssertRC(rc);
3299 return rc;
3300 }
3301 Assert(pPDDst);
3302 PdeDst = pPDDst->a[iPDDst];
3303
3304# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3305 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3306 PX86PDPAE pPDDst;
3307 X86PDEPAE PdeDst;
3308
3309# if PGM_GST_TYPE == PGM_TYPE_PROT
3310 /* AMD-V nested paging */
3311 X86PML4E Pml4eSrc;
3312 X86PDPE PdpeSrc;
3313 PX86PML4E pPml4eSrc = &Pml4eSrc;
3314
3315 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3316 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3317 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3318# endif
3319
3320 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3321 if (rc != VINF_SUCCESS)
3322 {
3323 pgmUnlock(pVM);
3324 AssertRC(rc);
3325 return rc;
3326 }
3327 Assert(pPDDst);
3328 PdeDst = pPDDst->a[iPDDst];
3329# endif
3330 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3331 {
3332 if (!PdeDst.n.u1Present)
3333 {
3334 /** @todo r=bird: This guy will set the A bit on the PDE,
3335 * probably harmless. */
3336 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3337 }
3338 else
3339 {
3340 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3341 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3342 * makes no sense to prefetch more than one page.
3343 */
3344 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3345 if (RT_SUCCESS(rc))
3346 rc = VINF_SUCCESS;
3347 }
3348 }
3349 pgmUnlock(pVM);
3350 }
3351 return rc;
3352
3353#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3354 return VINF_SUCCESS; /* ignore */
3355#else
3356 AssertCompile(0);
3357#endif
3358}
3359
3360
3361
3362
3363/**
3364 * Syncs a page during a PGMVerifyAccess() call.
3365 *
3366 * @returns VBox status code (informational included).
3367 * @param pVCpu The VMCPU handle.
3368 * @param GCPtrPage The address of the page to sync.
3369 * @param fPage The effective guest page flags.
3370 * @param uErr The trap error code.
3371 * @remarks This will normally never be called on invalid guest page
3372 * translation entries.
3373 */
3374PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3375{
3376 PVM pVM = pVCpu->CTX_SUFF(pVM);
3377
3378 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3379
3380 Assert(!pVM->pgm.s.fNestedPaging);
3381#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3382 || PGM_GST_TYPE == PGM_TYPE_REAL \
3383 || PGM_GST_TYPE == PGM_TYPE_PROT \
3384 || PGM_GST_TYPE == PGM_TYPE_PAE \
3385 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3386 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3387 && PGM_SHW_TYPE != PGM_TYPE_EPT
3388
3389# ifndef IN_RING0
3390 if (!(fPage & X86_PTE_US))
3391 {
3392 /*
3393 * Mark this page as safe.
3394 */
3395 /** @todo not correct for pages that contain both code and data!! */
3396 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3397 CSAMMarkPage(pVM, GCPtrPage, true);
3398 }
3399# endif
3400
3401 /*
3402 * Get guest PD and index.
3403 */
3404 /** @todo Performance: We've done all this a jiffy ago in the
3405 * PGMGstGetPage call. */
3406# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3407# if PGM_GST_TYPE == PGM_TYPE_32BIT
3408 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3409 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3410
3411# elif PGM_GST_TYPE == PGM_TYPE_PAE
3412 unsigned iPDSrc = 0;
3413 X86PDPE PdpeSrc;
3414 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3415 if (RT_UNLIKELY(!pPDSrc))
3416 {
3417 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3418 return VINF_EM_RAW_GUEST_TRAP;
3419 }
3420
3421# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3422 unsigned iPDSrc = 0; /* shut up gcc */
3423 PX86PML4E pPml4eSrc = NULL; /* ditto */
3424 X86PDPE PdpeSrc;
3425 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3426 if (RT_UNLIKELY(!pPDSrc))
3427 {
3428 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3429 return VINF_EM_RAW_GUEST_TRAP;
3430 }
3431# endif
3432
3433# else /* !PGM_WITH_PAGING */
3434 PGSTPD pPDSrc = NULL;
3435 const unsigned iPDSrc = 0;
3436# endif /* !PGM_WITH_PAGING */
3437 int rc = VINF_SUCCESS;
3438
3439 pgmLock(pVM);
3440
3441 /*
3442 * First check if the shadow pd is present.
3443 */
3444# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3445 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3446
3447# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3448 PX86PDEPAE pPdeDst;
3449 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3450 PX86PDPAE pPDDst;
3451# if PGM_GST_TYPE != PGM_TYPE_PAE
3452 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3453 X86PDPE PdpeSrc;
3454 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3455# endif
3456 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3457 if (rc != VINF_SUCCESS)
3458 {
3459 pgmUnlock(pVM);
3460 AssertRC(rc);
3461 return rc;
3462 }
3463 Assert(pPDDst);
3464 pPdeDst = &pPDDst->a[iPDDst];
3465
3466# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3467 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3468 PX86PDPAE pPDDst;
3469 PX86PDEPAE pPdeDst;
3470
3471# if PGM_GST_TYPE == PGM_TYPE_PROT
3472 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3473 X86PML4E Pml4eSrc;
3474 X86PDPE PdpeSrc;
3475 PX86PML4E pPml4eSrc = &Pml4eSrc;
3476 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3477 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3478# endif
3479
3480 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3481 if (rc != VINF_SUCCESS)
3482 {
3483 pgmUnlock(pVM);
3484 AssertRC(rc);
3485 return rc;
3486 }
3487 Assert(pPDDst);
3488 pPdeDst = &pPDDst->a[iPDDst];
3489# endif
3490
3491 if (!pPdeDst->n.u1Present)
3492 {
3493 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3494 if (rc != VINF_SUCCESS)
3495 {
3496 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3497 pgmUnlock(pVM);
3498 AssertRC(rc);
3499 return rc;
3500 }
3501 }
3502
3503# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3504 /* Check for dirty bit fault */
3505 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3506 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3507 Log(("PGMVerifyAccess: success (dirty)\n"));
3508 else
3509# endif
3510 {
3511# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3512 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3513# else
3514 GSTPDE PdeSrc;
3515 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3516 PdeSrc.n.u1Present = 1;
3517 PdeSrc.n.u1Write = 1;
3518 PdeSrc.n.u1Accessed = 1;
3519 PdeSrc.n.u1User = 1;
3520# endif
3521
3522 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3523 if (uErr & X86_TRAP_PF_US)
3524 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncUser));
3525 else /* supervisor */
3526 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3527
3528 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3529 if (RT_SUCCESS(rc))
3530 {
3531 /* Page was successfully synced */
3532 Log2(("PGMVerifyAccess: success (sync)\n"));
3533 rc = VINF_SUCCESS;
3534 }
3535 else
3536 {
3537 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
3538 rc = VINF_EM_RAW_GUEST_TRAP;
3539 }
3540 }
3541 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3542 pgmUnlock(pVM);
3543 return rc;
3544
3545#else /* PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_NESTED */
3546
3547 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3548 return VERR_INTERNAL_ERROR;
3549#endif /* PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_NESTED */
3550}
3551
3552
3553/**
3554 * Syncs the paging hierarchy starting at CR3.
3555 *
3556 * @returns VBox status code, no specials.
3557 * @param pVCpu The VMCPU handle.
3558 * @param cr0 Guest context CR0 register
3559 * @param cr3 Guest context CR3 register
3560 * @param cr4 Guest context CR4 register
3561 * @param fGlobal Including global page directories or not
3562 */
3563PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3564{
3565 PVM pVM = pVCpu->CTX_SUFF(pVM);
3566
3567 LogFlow(("SyncCR3 fGlobal=%d\n", !!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
3568
3569#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3570
3571 pgmLock(pVM);
3572
3573# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3574 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3575 if (pPool->cDirtyPages)
3576 pgmPoolResetDirtyPages(pVM);
3577# endif
3578
3579 /*
3580 * Update page access handlers.
3581 * The virtual are always flushed, while the physical are only on demand.
3582 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3583 * have to look into that later because it will have a bad influence on the performance.
3584 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3585 * bird: Yes, but that won't work for aliases.
3586 */
3587 /** @todo this MUST go away. See #1557. */
3588 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncCR3Handlers), h);
3589 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3590 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncCR3Handlers), h);
3591 pgmUnlock(pVM);
3592#endif /* !NESTED && !EPT */
3593
3594#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3595 /*
3596 * Nested / EPT - almost no work.
3597 */
3598 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3599 return VINF_SUCCESS;
3600
3601#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3602 /*
3603 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3604 * out the shadow parts when the guest modifies its tables.
3605 */
3606 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3607 return VINF_SUCCESS;
3608
3609#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3610
3611# ifndef PGM_WITHOUT_MAPPINGS
3612 /*
3613 * Check for and resolve conflicts with our guest mappings if they
3614 * are enabled and not fixed.
3615 */
3616 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
3617 {
3618 int rc = pgmMapResolveConflicts(pVM);
3619 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3620 if (rc == VINF_PGM_SYNC_CR3)
3621 {
3622 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3623 return VINF_PGM_SYNC_CR3;
3624 }
3625 }
3626# else
3627 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3628# endif
3629 return VINF_SUCCESS;
3630#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3631}
3632
3633
3634
3635
3636#ifdef VBOX_STRICT
3637# ifdef IN_RC
3638# undef AssertMsgFailed
3639# define AssertMsgFailed Log
3640# endif
3641
3642/**
3643 * Checks that the shadow page table is in sync with the guest one.
3644 *
3645 * @returns The number of errors.
3646 * @param pVM The virtual machine.
3647 * @param pVCpu The VMCPU handle.
3648 * @param cr3 Guest context CR3 register
3649 * @param cr4 Guest context CR4 register
3650 * @param GCPtr Where to start. Defaults to 0.
3651 * @param cb How much to check. Defaults to everything.
3652 */
3653PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3654{
3655#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3656 return 0;
3657#else
3658 unsigned cErrors = 0;
3659 PVM pVM = pVCpu->CTX_SUFF(pVM);
3660 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3661
3662#if PGM_GST_TYPE == PGM_TYPE_PAE
3663 /** @todo currently broken; crashes below somewhere */
3664 AssertFailed();
3665#endif
3666
3667#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3668 || PGM_GST_TYPE == PGM_TYPE_PAE \
3669 || PGM_GST_TYPE == PGM_TYPE_AMD64
3670
3671 bool fBigPagesSupported = GST_IS_PSE_ACTIVE(pVCpu);
3672 PPGMCPU pPGM = &pVCpu->pgm.s;
3673 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3674 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3675# ifndef IN_RING0
3676 RTHCPHYS HCPhys; /* general usage. */
3677# endif
3678 int rc;
3679
3680 /*
3681 * Check that the Guest CR3 and all its mappings are correct.
3682 */
3683 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3684 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3685 false);
3686# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3687# if PGM_GST_TYPE == PGM_TYPE_32BIT
3688 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3689# else
3690 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3691# endif
3692 AssertRCReturn(rc, 1);
3693 HCPhys = NIL_RTHCPHYS;
3694 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3695 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3696# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3697 pgmGstGet32bitPDPtr(pVCpu);
3698 RTGCPHYS GCPhys;
3699 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3700 AssertRCReturn(rc, 1);
3701 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3702# endif
3703# endif /* !IN_RING0 */
3704
3705 /*
3706 * Get and check the Shadow CR3.
3707 */
3708# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3709 unsigned cPDEs = X86_PG_ENTRIES;
3710 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3711# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3712# if PGM_GST_TYPE == PGM_TYPE_32BIT
3713 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3714# else
3715 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3716# endif
3717 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3718# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3719 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3720 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3721# endif
3722 if (cb != ~(RTGCPTR)0)
3723 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3724
3725/** @todo call the other two PGMAssert*() functions. */
3726
3727# if PGM_GST_TYPE == PGM_TYPE_AMD64
3728 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3729
3730 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3731 {
3732 PPGMPOOLPAGE pShwPdpt = NULL;
3733 PX86PML4E pPml4eSrc;
3734 PX86PML4E pPml4eDst;
3735 RTGCPHYS GCPhysPdptSrc;
3736
3737 pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
3738 pPml4eDst = pgmShwGetLongModePML4EPtr(pVCpu, iPml4);
3739
3740 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3741 if (!pPml4eDst->n.u1Present)
3742 {
3743 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3744 continue;
3745 }
3746
3747 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3748 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK;
3749
3750 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3751 {
3752 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3753 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3754 cErrors++;
3755 continue;
3756 }
3757
3758 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3759 {
3760 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3761 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3762 cErrors++;
3763 continue;
3764 }
3765
3766 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3767 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3768 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3769 {
3770 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3771 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3772 cErrors++;
3773 continue;
3774 }
3775# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3776 {
3777# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3778
3779# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3780 /*
3781 * Check the PDPTEs too.
3782 */
3783 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3784
3785 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3786 {
3787 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3788 PPGMPOOLPAGE pShwPde = NULL;
3789 PX86PDPE pPdpeDst;
3790 RTGCPHYS GCPhysPdeSrc;
3791# if PGM_GST_TYPE == PGM_TYPE_PAE
3792 X86PDPE PdpeSrc;
3793 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
3794 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
3795# else
3796 PX86PML4E pPml4eSrcIgn;
3797 X86PDPE PdpeSrc;
3798 PX86PDPT pPdptDst;
3799 PX86PDPAE pPDDst;
3800 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3801
3802 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3803 if (rc != VINF_SUCCESS)
3804 {
3805 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3806 GCPtr += 512 * _2M;
3807 continue; /* next PDPTE */
3808 }
3809 Assert(pPDDst);
3810# endif
3811 Assert(iPDSrc == 0);
3812
3813 pPdpeDst = &pPdptDst->a[iPdpt];
3814
3815 if (!pPdpeDst->n.u1Present)
3816 {
3817 GCPtr += 512 * _2M;
3818 continue; /* next PDPTE */
3819 }
3820
3821 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3822 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3823
3824 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3825 {
3826 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3827 GCPtr += 512 * _2M;
3828 cErrors++;
3829 continue;
3830 }
3831
3832 if (GCPhysPdeSrc != pShwPde->GCPhys)
3833 {
3834# if PGM_GST_TYPE == PGM_TYPE_AMD64
3835 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3836# else
3837 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3838# endif
3839 GCPtr += 512 * _2M;
3840 cErrors++;
3841 continue;
3842 }
3843
3844# if PGM_GST_TYPE == PGM_TYPE_AMD64
3845 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3846 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3847 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3848 {
3849 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3850 GCPtr += 512 * _2M;
3851 cErrors++;
3852 continue;
3853 }
3854# endif
3855
3856# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3857 {
3858# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3859# if PGM_GST_TYPE == PGM_TYPE_32BIT
3860 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3861# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3862 PCX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
3863# endif
3864# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3865 /*
3866 * Iterate the shadow page directory.
3867 */
3868 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3869 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3870
3871 for (;
3872 iPDDst < cPDEs;
3873 iPDDst++, GCPtr += cIncrement)
3874 {
3875# if PGM_SHW_TYPE == PGM_TYPE_PAE
3876 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pVCpu, GCPtr);
3877# else
3878 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3879# endif
3880 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3881 {
3882 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3883 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3884 {
3885 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3886 cErrors++;
3887 continue;
3888 }
3889 }
3890 else if ( (PdeDst.u & X86_PDE_P)
3891 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3892 )
3893 {
3894 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3895 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3896 if (!pPoolPage)
3897 {
3898 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3899 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3900 cErrors++;
3901 continue;
3902 }
3903 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pPoolPage);
3904
3905 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3906 {
3907 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3908 GCPtr, (uint64_t)PdeDst.u));
3909 cErrors++;
3910 }
3911
3912 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3913 {
3914 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3915 GCPtr, (uint64_t)PdeDst.u));
3916 cErrors++;
3917 }
3918
3919 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3920 if (!PdeSrc.n.u1Present)
3921 {
3922 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3923 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3924 cErrors++;
3925 continue;
3926 }
3927
3928 if ( !PdeSrc.b.u1Size
3929 || !fBigPagesSupported)
3930 {
3931 GCPhysGst = GST_GET_PDE_GCPHYS(PdeSrc);
3932# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3933 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3934# endif
3935 }
3936 else
3937 {
3938# if PGM_GST_TYPE == PGM_TYPE_32BIT
3939 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3940 {
3941 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3942 GCPtr, (uint64_t)PdeSrc.u));
3943 cErrors++;
3944 continue;
3945 }
3946# endif
3947 GCPhysGst = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
3948# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3949 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3950# endif
3951 }
3952
3953 if ( pPoolPage->enmKind
3954 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3955 {
3956 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3957 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3958 cErrors++;
3959 }
3960
3961 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3962 if (!pPhysPage)
3963 {
3964 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3965 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3966 cErrors++;
3967 continue;
3968 }
3969
3970 if (GCPhysGst != pPoolPage->GCPhys)
3971 {
3972 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3973 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3974 cErrors++;
3975 continue;
3976 }
3977
3978 if ( !PdeSrc.b.u1Size
3979 || !fBigPagesSupported)
3980 {
3981 /*
3982 * Page Table.
3983 */
3984 const GSTPT *pPTSrc;
3985 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3986 if (RT_FAILURE(rc))
3987 {
3988 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3989 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3990 cErrors++;
3991 continue;
3992 }
3993 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3994 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3995 {
3996 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3997 // (This problem will go away when/if we shadow multiple CR3s.)
3998 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3999 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4000 cErrors++;
4001 continue;
4002 }
4003 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4004 {
4005 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
4006 GCPtr, (uint64_t)PdeDst.u));
4007 cErrors++;
4008 continue;
4009 }
4010
4011 /* iterate the page table. */
4012# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4013 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
4014 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
4015# else
4016 const unsigned offPTSrc = 0;
4017# endif
4018 for (unsigned iPT = 0, off = 0;
4019 iPT < RT_ELEMENTS(pPTDst->a);
4020 iPT++, off += PAGE_SIZE)
4021 {
4022 const SHWPTE PteDst = pPTDst->a[iPT];
4023
4024 /* skip not-present and dirty tracked entries. */
4025 if (!(SHW_PTE_GET_U(PteDst) & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
4026 continue;
4027 Assert(SHW_PTE_IS_P(PteDst));
4028
4029 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
4030 if (!PteSrc.n.u1Present)
4031 {
4032# ifdef IN_RING3
4033 PGMAssertHandlerAndFlagsInSync(pVM);
4034 DBGFR3PagingDumpEx(pVM, pVCpu->idCpu, DBGFPGDMP_FLAGS_CURRENT_CR3 | DBGFPGDMP_FLAGS_CURRENT_MODE
4035 | DBGFPGDMP_FLAGS_GUEST | DBGFPGDMP_FLAGS_HEADER | DBGFPGDMP_FLAGS_PRINT_CR3,
4036 0, 0, UINT64_MAX, 99, NULL);
4037# endif
4038 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
4039 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst), pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
4040 (uint64_t)GST_GET_PDE_GCPHYS(PdeSrc) + (iPT + offPTSrc)*sizeof(PteSrc)));
4041 cErrors++;
4042 continue;
4043 }
4044
4045 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
4046# if 1 /** @todo sync accessed bit properly... */
4047 fIgnoreFlags |= X86_PTE_A;
4048# endif
4049
4050 /* match the physical addresses */
4051 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
4052 GCPhysGst = GST_GET_PTE_GCPHYS(PteSrc);
4053
4054# ifdef IN_RING3
4055 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4056 if (RT_FAILURE(rc))
4057 {
4058 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4059 {
4060 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4061 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4062 cErrors++;
4063 continue;
4064 }
4065 }
4066 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4067 {
4068 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4069 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4070 cErrors++;
4071 continue;
4072 }
4073# endif
4074
4075 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4076 if (!pPhysPage)
4077 {
4078# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4079 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4080 {
4081 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4082 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4083 cErrors++;
4084 continue;
4085 }
4086# endif
4087 if (SHW_PTE_IS_RW(PteDst))
4088 {
4089 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4090 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4091 cErrors++;
4092 }
4093 fIgnoreFlags |= X86_PTE_RW;
4094 }
4095 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4096 {
4097 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4098 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4099 cErrors++;
4100 continue;
4101 }
4102
4103 /* flags */
4104 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4105 {
4106 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4107 {
4108 if (SHW_PTE_IS_RW(PteDst))
4109 {
4110 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4111 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4112 cErrors++;
4113 continue;
4114 }
4115 fIgnoreFlags |= X86_PTE_RW;
4116 }
4117 else
4118 {
4119 if ( SHW_PTE_IS_P(PteDst)
4120# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4121 && !PGM_PAGE_IS_MMIO(pPhysPage)
4122# endif
4123 )
4124 {
4125 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4126 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4127 cErrors++;
4128 continue;
4129 }
4130 fIgnoreFlags |= X86_PTE_P;
4131 }
4132 }
4133 else
4134 {
4135 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4136 {
4137 if (SHW_PTE_IS_RW(PteDst))
4138 {
4139 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4140 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4141 cErrors++;
4142 continue;
4143 }
4144 if (!SHW_PTE_IS_TRACK_DIRTY(PteDst))
4145 {
4146 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4147 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4148 cErrors++;
4149 continue;
4150 }
4151 if (SHW_PTE_IS_D(PteDst))
4152 {
4153 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4154 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4155 cErrors++;
4156 }
4157# if 0 /** @todo sync access bit properly... */
4158 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4159 {
4160 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4161 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4162 cErrors++;
4163 }
4164 fIgnoreFlags |= X86_PTE_RW;
4165# else
4166 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4167# endif
4168 }
4169 else if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
4170 {
4171 /* access bit emulation (not implemented). */
4172 if (PteSrc.n.u1Accessed || SHW_PTE_IS_P(PteDst))
4173 {
4174 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4175 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4176 cErrors++;
4177 continue;
4178 }
4179 if (!SHW_PTE_IS_A(PteDst))
4180 {
4181 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4182 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4183 cErrors++;
4184 }
4185 fIgnoreFlags |= X86_PTE_P;
4186 }
4187# ifdef DEBUG_sandervl
4188 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4189# endif
4190 }
4191
4192 if ( (PteSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4193 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4194 )
4195 {
4196 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4197 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4198 fIgnoreFlags, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4199 cErrors++;
4200 continue;
4201 }
4202 } /* foreach PTE */
4203 }
4204 else
4205 {
4206 /*
4207 * Big Page.
4208 */
4209 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4210 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4211 {
4212 if (PdeDst.n.u1Write)
4213 {
4214 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4215 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4216 cErrors++;
4217 continue;
4218 }
4219 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4220 {
4221 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4222 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4223 cErrors++;
4224 continue;
4225 }
4226# if 0 /** @todo sync access bit properly... */
4227 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4228 {
4229 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4230 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4231 cErrors++;
4232 }
4233 fIgnoreFlags |= X86_PTE_RW;
4234# else
4235 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4236# endif
4237 }
4238 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4239 {
4240 /* access bit emulation (not implemented). */
4241 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4242 {
4243 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4244 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4245 cErrors++;
4246 continue;
4247 }
4248 if (!PdeDst.n.u1Accessed)
4249 {
4250 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4251 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4252 cErrors++;
4253 }
4254 fIgnoreFlags |= X86_PTE_P;
4255 }
4256
4257 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4258 {
4259 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4260 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4261 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4262 cErrors++;
4263 }
4264
4265 /* iterate the page table. */
4266 for (unsigned iPT = 0, off = 0;
4267 iPT < RT_ELEMENTS(pPTDst->a);
4268 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4269 {
4270 const SHWPTE PteDst = pPTDst->a[iPT];
4271
4272 if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
4273 {
4274 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4275 GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4276 cErrors++;
4277 }
4278
4279 /* skip not-present entries. */
4280 if (!SHW_PTE_IS_P(PteDst)) /** @todo deal with ALL handlers and CSAM !P pages! */
4281 continue;
4282
4283 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4284
4285 /* match the physical addresses */
4286 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
4287
4288# ifdef IN_RING3
4289 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4290 if (RT_FAILURE(rc))
4291 {
4292 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4293 {
4294 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4295 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4296 cErrors++;
4297 }
4298 }
4299 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4300 {
4301 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4302 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4303 cErrors++;
4304 continue;
4305 }
4306# endif
4307 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4308 if (!pPhysPage)
4309 {
4310# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4311 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4312 {
4313 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4314 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4315 cErrors++;
4316 continue;
4317 }
4318# endif
4319 if (SHW_PTE_IS_RW(PteDst))
4320 {
4321 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4322 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4323 cErrors++;
4324 }
4325 fIgnoreFlags |= X86_PTE_RW;
4326 }
4327 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4328 {
4329 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4330 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4331 cErrors++;
4332 continue;
4333 }
4334
4335 /* flags */
4336 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4337 {
4338 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4339 {
4340 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4341 {
4342 if (SHW_PTE_IS_RW(PteDst))
4343 {
4344 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4345 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4346 cErrors++;
4347 continue;
4348 }
4349 fIgnoreFlags |= X86_PTE_RW;
4350 }
4351 }
4352 else
4353 {
4354 if ( SHW_PTE_IS_P(PteDst)
4355# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4356 && !PGM_PAGE_IS_MMIO(pPhysPage)
4357# endif
4358 )
4359 {
4360 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4361 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4362 cErrors++;
4363 continue;
4364 }
4365 fIgnoreFlags |= X86_PTE_P;
4366 }
4367 }
4368
4369 if ( (PdeSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4370 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags) /* lazy phys handler dereg. */
4371 )
4372 {
4373 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4374 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4375 fIgnoreFlags, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4376 cErrors++;
4377 continue;
4378 }
4379 } /* for each PTE */
4380 }
4381 }
4382 /* not present */
4383
4384 } /* for each PDE */
4385
4386 } /* for each PDPTE */
4387
4388 } /* for each PML4E */
4389
4390# ifdef DEBUG
4391 if (cErrors)
4392 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4393# endif
4394
4395#endif /* GST == 32BIT, PAE or AMD64 */
4396 return cErrors;
4397
4398#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4399}
4400#endif /* VBOX_STRICT */
4401
4402
4403/**
4404 * Sets up the CR3 for shadow paging
4405 *
4406 * @returns Strict VBox status code.
4407 * @retval VINF_SUCCESS.
4408 *
4409 * @param pVCpu The VMCPU handle.
4410 * @param GCPhysCR3 The physical address in the CR3 register.
4411 */
4412PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4413{
4414 PVM pVM = pVCpu->CTX_SUFF(pVM);
4415
4416 /* Update guest paging info. */
4417#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4418 || PGM_GST_TYPE == PGM_TYPE_PAE \
4419 || PGM_GST_TYPE == PGM_TYPE_AMD64
4420
4421 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4422
4423 /*
4424 * Map the page CR3 points at.
4425 */
4426 RTHCPTR HCPtrGuestCR3;
4427 RTHCPHYS HCPhysGuestCR3;
4428 pgmLock(pVM);
4429 PPGMPAGE pPageCR3 = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4430 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4431 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4432 /** @todo this needs some reworking wrt. locking? */
4433# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4434 HCPtrGuestCR3 = NIL_RTHCPTR;
4435 int rc = VINF_SUCCESS;
4436# else
4437 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4438# endif
4439 pgmUnlock(pVM);
4440 if (RT_SUCCESS(rc))
4441 {
4442 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4443 if (RT_SUCCESS(rc))
4444 {
4445# ifdef IN_RC
4446 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4447# endif
4448# if PGM_GST_TYPE == PGM_TYPE_32BIT
4449 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4450# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4451 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4452# endif
4453 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4454
4455# elif PGM_GST_TYPE == PGM_TYPE_PAE
4456 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4457 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4458# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4459 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4460# endif
4461 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4462 LogFlow(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4463
4464 /*
4465 * Map the 4 PDs too.
4466 */
4467 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(pVCpu);
4468 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4469 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4470 {
4471 if (pGuestPDPT->a[i].n.u1Present)
4472 {
4473 RTHCPTR HCPtr;
4474 RTHCPHYS HCPhys;
4475 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4476 pgmLock(pVM);
4477 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4478 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4479 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4480# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4481 HCPtr = NIL_RTHCPTR;
4482 int rc2 = VINF_SUCCESS;
4483# else
4484 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4485# endif
4486 pgmUnlock(pVM);
4487 if (RT_SUCCESS(rc2))
4488 {
4489 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4490 AssertRCReturn(rc, rc);
4491
4492 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4493# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4494 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4495# endif
4496 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4497 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4498# ifdef IN_RC
4499 PGM_INVL_PG(pVCpu, GCPtr);
4500# endif
4501 continue;
4502 }
4503 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4504 }
4505
4506 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4507# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4508 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4509# endif
4510 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4511 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4512# ifdef IN_RC
4513 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4514# endif
4515 }
4516
4517# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4518 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4519# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4520 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4521# endif
4522# endif
4523 }
4524 else
4525 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4526 }
4527 else
4528 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4529
4530#else /* prot/real stub */
4531 int rc = VINF_SUCCESS;
4532#endif
4533
4534 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4535# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4536 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4537 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4538 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4539 && PGM_GST_TYPE != PGM_TYPE_PROT))
4540
4541 Assert(!pVM->pgm.s.fNestedPaging);
4542
4543 /*
4544 * Update the shadow root page as well since that's not fixed.
4545 */
4546 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4547 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4548 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4549 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4550 PPGMPOOLPAGE pNewShwPageCR3;
4551
4552 pgmLock(pVM);
4553
4554# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4555 if (pPool->cDirtyPages)
4556 pgmPoolResetDirtyPages(pVM);
4557# endif
4558
4559 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4560 rc = pgmPoolAllocEx(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE, SHW_POOL_ROOT_IDX,
4561 GCPhysCR3 >> PAGE_SHIFT, true /*fLockPage*/, &pNewShwPageCR3);
4562 AssertFatalRC(rc);
4563 rc = VINF_SUCCESS;
4564
4565# ifdef IN_RC
4566 /*
4567 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4568 * state will be inconsistent! Flush important things now while
4569 * we still can and then make sure there are no ring-3 calls.
4570 */
4571 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4572 VMMRZCallRing3Disable(pVCpu);
4573# endif
4574
4575 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4576 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4577 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4578# ifdef IN_RING0
4579 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4580 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4581# elif defined(IN_RC)
4582 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4583 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4584# else
4585 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4586 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4587# endif
4588
4589# ifndef PGM_WITHOUT_MAPPINGS
4590 /*
4591 * Apply all hypervisor mappings to the new CR3.
4592 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4593 * make sure we check for conflicts in the new CR3 root.
4594 */
4595# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4596 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4597# endif
4598 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4599 AssertRCReturn(rc, rc);
4600# endif
4601
4602 /* Set the current hypervisor CR3. */
4603 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4604 SELMShadowCR3Changed(pVM, pVCpu);
4605
4606# ifdef IN_RC
4607 /* NOTE: The state is consistent again. */
4608 VMMRZCallRing3Enable(pVCpu);
4609# endif
4610
4611 /* Clean up the old CR3 root. */
4612 if ( pOldShwPageCR3
4613 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4614 {
4615 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4616# ifndef PGM_WITHOUT_MAPPINGS
4617 /* Remove the hypervisor mappings from the shadow page table. */
4618 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4619# endif
4620 /* Mark the page as unlocked; allow flushing again. */
4621 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4622
4623 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4624 }
4625 pgmUnlock(pVM);
4626# endif
4627
4628 return rc;
4629}
4630
4631/**
4632 * Unmaps the shadow CR3.
4633 *
4634 * @returns VBox status, no specials.
4635 * @param pVCpu The VMCPU handle.
4636 */
4637PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4638{
4639 LogFlow(("UnmapCR3\n"));
4640
4641 int rc = VINF_SUCCESS;
4642 PVM pVM = pVCpu->CTX_SUFF(pVM);
4643
4644 /*
4645 * Update guest paging info.
4646 */
4647#if PGM_GST_TYPE == PGM_TYPE_32BIT
4648 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4649# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4650 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4651# endif
4652 pVCpu->pgm.s.pGst32BitPdRC = 0;
4653
4654#elif PGM_GST_TYPE == PGM_TYPE_PAE
4655 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4656# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4657 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4658# endif
4659 pVCpu->pgm.s.pGstPaePdptRC = 0;
4660 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4661 {
4662 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4663# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4664 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4665# endif
4666 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4667 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4668 }
4669
4670#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4671 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4672# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4673 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4674# endif
4675
4676#else /* prot/real mode stub */
4677 /* nothing to do */
4678#endif
4679
4680#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4681 /*
4682 * Update shadow paging info.
4683 */
4684# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4685 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4686 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4687
4688# if PGM_GST_TYPE != PGM_TYPE_REAL
4689 Assert(!pVM->pgm.s.fNestedPaging);
4690# endif
4691
4692 pgmLock(pVM);
4693
4694# ifndef PGM_WITHOUT_MAPPINGS
4695 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4696 /* Remove the hypervisor mappings from the shadow page table. */
4697 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4698# endif
4699
4700 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4701 {
4702 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4703
4704 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4705
4706# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4707 if (pPool->cDirtyPages)
4708 pgmPoolResetDirtyPages(pVM);
4709# endif
4710
4711 /* Mark the page as unlocked; allow flushing again. */
4712 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4713
4714 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4715 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4716 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4717 pVCpu->pgm.s.pShwPageCR3RC = 0;
4718 pVCpu->pgm.s.iShwUser = 0;
4719 pVCpu->pgm.s.iShwUserTable = 0;
4720 }
4721 pgmUnlock(pVM);
4722# endif
4723#endif /* !IN_RC*/
4724
4725 return rc;
4726}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette