VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 31136

Last change on this file since 31136 was 31136, checked in by vboxsync, 15 years ago

PGM: cache the last physical handler lookup result in each ring.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 205.9 KB
Line 
1/* $Id: PGMAllBth.h 31136 2010-07-27 12:06:18Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks The nested page tables on AMD makes use of PGM_SHW_TYPE in
6 * {PGM_TYPE_AMD64, PGM_TYPE_PAE and PGM_TYPE_32BIT} and PGM_GST_TYPE
7 * set to PGM_TYPE_PROT. Half of the code in this file is not
8 * exercised with PGM_SHW_TYPE set to PGM_TYPE_NESTED.
9 *
10 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
11 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
12 *
13 * @remarks This file is one big \#ifdef-orgy!
14 *
15 */
16
17/*
18 * Copyright (C) 2006-2010 Oracle Corporation
19 *
20 * This file is part of VirtualBox Open Source Edition (OSE), as
21 * available from http://www.virtualbox.org. This file is free software;
22 * you can redistribute it and/or modify it under the terms of the GNU
23 * General Public License (GPL) as published by the Free Software
24 * Foundation, in version 2 as it comes in the "COPYING" file of the
25 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
26 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
27 */
28
29
30/*******************************************************************************
31* Internal Functions *
32*******************************************************************************/
33RT_C_DECLS_BEGIN
34PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
35PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
36PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
37PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
38PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage);
39PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
40PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
41PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
42PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
43#ifdef VBOX_STRICT
44PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
45#endif
46DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte);
47PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
48PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
49RT_C_DECLS_END
50
51
52/*
53 * Filter out some illegal combinations of guest and shadow paging, so we can
54 * remove redundant checks inside functions.
55 */
56#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
57# error "Invalid combination; PAE guest implies PAE shadow"
58#endif
59
60#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
61 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
62# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
63#endif
64
65#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
66 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
67# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
68#endif
69
70#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
71 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
72# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
73#endif
74
75#ifndef IN_RING3
76
77# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
78/**
79 * Deal with a guest page fault.
80 *
81 * @returns Strict VBox status code.
82 * @retval VINF_EM_RAW_GUEST_TRAP
83 * @retval VINF_EM_RAW_EMULATE_INSTR
84 *
85 * @param pVCpu The current CPU.
86 * @param pGstWalk The guest page table walk result.
87 * @param uErr The error code.
88 */
89PGM_BTH_DECL(VBOXSTRICTRC, Trap0eHandlerGuestFault)(PVMCPU pVCpu, PGSTPTWALK pGstWalk, RTGCUINT uErr)
90{
91# if !defined(PGM_WITHOUT_MAPPINGS) && (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE)
92 /*
93 * Check for write conflicts with our hypervisor mapping.
94 *
95 * If the guest happens to access a non-present page, where our hypervisor
96 * is currently mapped, then we'll create a #PF storm in the guest.
97 */
98 if ( (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW)
99 && MMHyperIsInsideArea(pVCpu->CTX_SUFF(pVM), pGstWalk->Core.GCPtr))
100 {
101 /* Force a CR3 sync to check for conflicts and emulate the instruction. */
102 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
103 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2GuestTrap; });
104 return VINF_EM_RAW_EMULATE_INSTR;
105 }
106# endif
107
108 /*
109 * Calc the error code for the guest trap.
110 */
111 uint32_t uNewErr = GST_IS_NX_ACTIVE(pVCpu)
112 ? uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID)
113 : uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US);
114 if (pGstWalk->Core.fBadPhysAddr)
115 {
116 uNewErr |= X86_TRAP_PF_RSVD | X86_TRAP_PF_P;
117 Assert(!pGstWalk->Core.fNotPresent);
118 }
119 else if (!pGstWalk->Core.fNotPresent)
120 uNewErr |= X86_TRAP_PF_P;
121 TRPMSetErrorCode(pVCpu, uNewErr);
122
123 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2GuestTrap; });
124 return VINF_EM_RAW_GUEST_TRAP;
125}
126# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
127
128
129/**
130 * Deal with a guest page fault.
131 *
132 * @returns Strict VBox status code.
133 *
134 * @param pVCpu The current CPU.
135 * @param uErr The error code.
136 * @param pRegFrame The register frame.
137 * @param pvFault The fault address.
138 * @param pPage The guest page at @a pvFault.
139 * @param pGstWalk The guest page table walk result.
140 */
141static VBOXSTRICTRC PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame,
142# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
143 RTGCPTR pvFault, PPGMPAGE pPage, PGSTPTWALK pGstWalk)
144# else
145 RTGCPTR pvFault, PPGMPAGE pPage)
146# endif
147{
148# if !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
149 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A};
150#endif
151 PVM pVM = pVCpu->CTX_SUFF(pVM);
152 int rc;
153
154 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
155 {
156 /*
157 * Physical page access handler.
158 */
159# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
160 const RTGCPHYS GCPhysFault = pGstWalk->Core.GCPhys;
161# else
162 const RTGCPHYS GCPhysFault = (RTGCPHYS)pvFault;
163# endif
164 PPGMPHYSHANDLER pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
165 if (pCur)
166 {
167# ifdef PGM_SYNC_N_PAGES
168 /*
169 * If the region is write protected and we got a page not present fault, then sync
170 * the pages. If the fault was caused by a read, then restart the instruction.
171 * In case of write access continue to the GC write handler.
172 *
173 * ASSUMES that there is only one handler per page or that they have similar write properties.
174 */
175 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
176 && !(uErr & X86_TRAP_PF_P))
177 {
178# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
179 rc = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
180# else
181 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
182# endif
183 if ( RT_FAILURE(rc)
184 || !(uErr & X86_TRAP_PF_RW)
185 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
186 {
187 AssertRC(rc);
188 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersOutOfSync);
189 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndPhys; });
190 return rc;
191 }
192 }
193# endif
194
195 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
196 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
197 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n",
198 pvFault, GCPhysFault, pPage, uErr, pCur->enmType));
199
200# if defined(IN_RC) || defined(IN_RING0) /** @todo remove this */
201 if (pCur->CTX_SUFF(pfnHandler))
202 {
203 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
204# ifdef IN_RING0
205 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
206# else
207 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
208# endif
209 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
210 void *pvUser = pCur->CTX_SUFF(pvUser);
211
212 STAM_PROFILE_START(&pCur->Stat, h);
213 if (fLeaveLock)
214 pgmUnlock(pVM); /** @todo: Not entirely safe. */
215
216 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
217 if (fLeaveLock)
218 pgmLock(pVM);
219# ifdef VBOX_WITH_STATISTICS
220 pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
221 if (pCur)
222 STAM_PROFILE_STOP(&pCur->Stat, h);
223# else
224 pCur = NULL; /* might be invalid by now. */
225# endif
226
227 }
228 else
229# endif /* IN_RC || IN_RING0 */
230 rc = VINF_EM_RAW_EMULATE_INSTR;
231
232 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersPhysical);
233 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndPhys; });
234 return rc;
235 }
236 }
237# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
238 else
239 {
240# ifdef PGM_SYNC_N_PAGES
241 /*
242 * If the region is write protected and we got a page not present fault, then sync
243 * the pages. If the fault was caused by a read, then restart the instruction.
244 * In case of write access continue to the GC write handler.
245 */
246 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
247 && !(uErr & X86_TRAP_PF_P))
248 {
249 rc = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
250 if ( RT_FAILURE(rc)
251 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
252 || !(uErr & X86_TRAP_PF_RW))
253 {
254 AssertRC(rc);
255 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersOutOfSync);
256 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndVirt; });
257 return rc;
258 }
259 }
260# endif
261 /*
262 * Ok, it's an virtual page access handler.
263 *
264 * Since it's faster to search by address, we'll do that first
265 * and then retry by GCPhys if that fails.
266 */
267 /** @todo r=bird: perhaps we should consider looking up by physical address directly now?
268 * r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be
269 * out of sync, because the page was changed without us noticing it (not-present -> present
270 * without invlpg or mov cr3, xxx).
271 */
272 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
273 if (pCur)
274 {
275 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
276 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
277 || !(uErr & X86_TRAP_PF_P)
278 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
279 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n",
280 pvFault, pGstWalk->Core.GCPhys, pPage, uErr, pCur->enmType));
281
282 if ( pvFault - pCur->Core.Key < pCur->cb
283 && ( uErr & X86_TRAP_PF_RW
284 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
285 {
286# ifdef IN_RC
287 STAM_PROFILE_START(&pCur->Stat, h);
288 pgmUnlock(pVM);
289 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
290 pgmLock(pVM);
291 STAM_PROFILE_STOP(&pCur->Stat, h);
292# else
293 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
294# endif
295 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersVirtual);
296 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndVirt; });
297 return rc;
298 }
299 /* Unhandled part of a monitored page */
300 }
301 else
302 {
303 /* Check by physical address. */
304 unsigned iPage;
305 rc = pgmHandlerVirtualFindByPhysAddr(pVM, pGstWalk->Core.GCPhys, &pCur, &iPage);
306 Assert(RT_SUCCESS(rc) || !pCur);
307 if ( pCur
308 && ( uErr & X86_TRAP_PF_RW
309 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
310 {
311 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == (pGstWalk->Core.GCPhys & X86_PTE_PAE_PG_MASK));
312# ifdef IN_RC
313 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
314 Assert(off < pCur->cb);
315 STAM_PROFILE_START(&pCur->Stat, h);
316 pgmUnlock(pVM);
317 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
318 pgmLock(pVM);
319 STAM_PROFILE_STOP(&pCur->Stat, h);
320# else
321 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
322# endif
323 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersVirtualByPhys);
324 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndVirt; });
325 return rc;
326 }
327 }
328 }
329# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
330
331 /*
332 * There is a handled area of the page, but this fault doesn't belong to it.
333 * We must emulate the instruction.
334 *
335 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
336 * we first check if this was a page-not-present fault for a page with only
337 * write access handlers. Restart the instruction if it wasn't a write access.
338 */
339 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersUnhandled);
340
341 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
342 && !(uErr & X86_TRAP_PF_P))
343 {
344# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
345 rc = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
346# else
347 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
348# endif
349 if ( RT_FAILURE(rc)
350 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
351 || !(uErr & X86_TRAP_PF_RW))
352 {
353 AssertRC(rc);
354 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersOutOfSync);
355 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndPhys; });
356 return rc;
357 }
358 }
359
360 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
361 * It's writing to an unhandled part of the LDT page several million times.
362 */
363 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
364 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
365 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndUnhandled; });
366 return rc;
367} /* if any kind of handler */
368
369
370/**
371 * #PF Handler for raw-mode guest execution.
372 *
373 * @returns VBox status code (appropriate for trap handling and GC return).
374 *
375 * @param pVCpu VMCPU Handle.
376 * @param uErr The trap error code.
377 * @param pRegFrame Trap register frame.
378 * @param pvFault The fault address.
379 * @param pfLockTaken PGM lock taken here or not (out)
380 */
381PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
382{
383 PVM pVM = pVCpu->CTX_SUFF(pVM);
384
385 *pfLockTaken = false;
386
387# if defined(IN_RC) && defined(VBOX_STRICT)
388 PGMDynCheckLocks(pVM);
389# endif
390
391# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
392 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
393 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
394 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
395 int rc;
396
397# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
398 /*
399 * Walk the guest page translation tables and check if it's a guest fault.
400 */
401 GSTPTWALK GstWalk;
402 rc = PGM_GST_NAME(Walk)(pVCpu, pvFault, &GstWalk);
403 if (RT_FAILURE_NP(rc))
404 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &GstWalk, uErr));
405
406 /* assert some GstWalk sanity. */
407# if PGM_GST_TYPE == PGM_TYPE_AMD64
408 AssertMsg(GstWalk.Pml4e.u == GstWalk.pPml4e->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pml4e.u, (uint64_t)GstWalk.pPml4e->u));
409# endif
410# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
411 AssertMsg(GstWalk.Pdpe.u == GstWalk.pPdpe->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pdpe.u, (uint64_t)GstWalk.pPdpe->u));
412# endif
413 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u));
414 AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u));
415 Assert(GstWalk.Core.fSucceeded);
416
417 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
418 {
419 if ( ( (uErr & X86_TRAP_PF_RW)
420 && !GstWalk.Core.fEffectiveRW
421 && ( (uErr & X86_TRAP_PF_US)
422 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
423 || ((uErr & X86_TRAP_PF_US) && !GstWalk.Core.fEffectiveUS)
424 || ((uErr & X86_TRAP_PF_ID) && GstWalk.Core.fEffectiveNX)
425 )
426 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &GstWalk, uErr));
427 }
428# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
429
430# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
431 /*
432 * If it is a reserved bit fault we know that it is an MMIO or access
433 * handler related fault and can skip the dirty page stuff below.
434 */
435 if (uErr & X86_TRAP_PF_RSVD)
436 {
437 Assert(uErr & X86_TRAP_PF_P);
438 PPGMPAGE pPage;
439/** @todo Only all physical access handlers here, so optimize further. */
440# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
441 rc = pgmPhysGetPageEx(&pVM->pgm.s, GstWalk.Core.GCPhys, &pPage);
442 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
443 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
444 &GstWalk));
445# else
446 rc = pgmPhysGetPageEx(&pVM->pgm.s, (RTGCPHYS)pvFault, &pPage);
447 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
448 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage));
449# endif
450 }
451# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
452
453# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
454 /*
455 * Set the accessed and dirty flags.
456 */
457# if PGM_GST_TYPE == PGM_TYPE_AMD64
458 GstWalk.Pml4e.u |= X86_PML4E_A;
459 GstWalk.pPml4e->u |= X86_PML4E_A;
460 GstWalk.Pdpe.u |= X86_PDPE_A;
461 GstWalk.pPdpe->u |= X86_PDPE_A;
462# endif
463 if (GstWalk.Core.fBigPage)
464 {
465 Assert(GstWalk.Pde.b.u1Size);
466 if (uErr & X86_TRAP_PF_RW)
467 {
468 GstWalk.Pde.u |= X86_PDE4M_A | X86_PDE4M_D;
469 GstWalk.pPde->u |= X86_PDE4M_A | X86_PDE4M_D;
470 }
471 else
472 {
473 GstWalk.Pde.u |= X86_PDE4M_A;
474 GstWalk.pPde->u |= X86_PDE4M_A;
475 }
476 }
477 else
478 {
479 Assert(!GstWalk.Pde.b.u1Size);
480 GstWalk.Pde.u |= X86_PDE_A;
481 GstWalk.pPde->u |= X86_PDE_A;
482 if (uErr & X86_TRAP_PF_RW)
483 {
484# ifdef VBOX_WITH_STATISTICS
485 if (!GstWalk.Pte.n.u1Dirty)
486 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtiedPage));
487 else
488 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageAlreadyDirty));
489# endif
490 GstWalk.Pte.u |= X86_PTE_A | X86_PTE_D;
491 GstWalk.pPte->u |= X86_PTE_A | X86_PTE_D;
492 }
493 else
494 {
495 GstWalk.Pte.u |= X86_PTE_A;
496 GstWalk.pPte->u |= X86_PTE_A;
497 }
498 Assert(GstWalk.Pte.u == GstWalk.pPte->u);
499 }
500 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u,
501 ("%RX64 %RX64 pPte=%p pPde=%p Pte=%RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u, GstWalk.pPte, GstWalk.pPde, (uint64_t)GstWalk.pPte->u));
502# else /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
503 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A}; /** @todo eliminate this */
504# endif /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
505
506 /* Take the big lock now. */
507 *pfLockTaken = true;
508 pgmLock(pVM);
509
510 /*
511 * Fetch the guest PDE, PDPE and PML4E.
512 */
513# if PGM_SHW_TYPE == PGM_TYPE_32BIT
514 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
515 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
516
517# elif PGM_SHW_TYPE == PGM_TYPE_PAE
518 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
519 PX86PDPAE pPDDst;
520# if PGM_GST_TYPE == PGM_TYPE_PAE
521 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, GstWalk.Pdpe.u, &pPDDst);
522# else
523 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, X86_PDPE_P, &pPDDst); /* RW, US and A are reserved in PAE mode. */
524# endif
525 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_INTERNAL_ERROR_4);
526
527# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
528 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
529 PX86PDPAE pPDDst;
530# if PGM_GST_TYPE == PGM_TYPE_PROT /* (AMD-V nested paging) */
531 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A,
532 X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A, &pPDDst);
533# else
534 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, GstWalk.Pml4e.u, GstWalk.Pdpe.u, &pPDDst);
535# endif
536 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_INTERNAL_ERROR_4);
537
538# elif PGM_SHW_TYPE == PGM_TYPE_EPT
539 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
540 PEPTPD pPDDst;
541 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
542 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_INTERNAL_ERROR_4);
543# endif
544 Assert(pPDDst);
545
546# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
547 /*
548 * Dirty page handling.
549 *
550 * If we successfully correct the write protection fault due to dirty bit
551 * tracking, then return immediately.
552 */
553 if (uErr & X86_TRAP_PF_RW) /* write fault? */
554 {
555 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyBitTracking), a);
556 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], GstWalk.pPde, pvFault);
557 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyBitTracking), a);
558 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
559 {
560 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
561 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
562 ? &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2DirtyAndAccessed
563 : &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2GuestTrap; });
564 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
565 return VINF_SUCCESS;
566 }
567 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u));
568 AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u));
569 }
570
571# if 0 /* rarely useful; leave for debugging. */
572 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
573# endif
574# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
575
576 /*
577 * A common case is the not-present error caused by lazy page table syncing.
578 *
579 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
580 * here so we can safely assume that the shadow PT is present when calling
581 * SyncPage later.
582 *
583 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
584 * of mapping conflict and defer to SyncCR3 in R3.
585 * (Again, we do NOT support access handlers for non-present guest pages.)
586 *
587 */
588# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
589 Assert(GstWalk.Pde.n.u1Present);
590# endif
591 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
592 && !pPDDst->a[iPDDst].n.u1Present)
593 {
594 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2SyncPT; });
595# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
596 LogFlow(("=>SyncPT %04x = %08RX64\n", (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, (uint64_t)GstWalk.Pde.u));
597 rc = PGM_BTH_NAME(SyncPT)(pVCpu, (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, GstWalk.pPd, pvFault);
598# else
599 LogFlow(("=>SyncPT pvFault=%RGv\n", pvFault));
600 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0, NULL, pvFault);
601# endif
602 if (RT_SUCCESS(rc))
603 return rc;
604 Log(("SyncPT: %RGv failed!! rc=%Rrc\n", pvFault, rc));
605 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
606 return VINF_PGM_SYNC_CR3;
607 }
608
609# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
610 /*
611 * Check if this address is within any of our mappings.
612 *
613 * This is *very* fast and it's gonna save us a bit of effort below and prevent
614 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
615 * (BTW, it's impossible to have physical access handlers in a mapping.)
616 */
617 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
618 {
619 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
620 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
621 {
622 if (pvFault < pMapping->GCPtr)
623 break;
624 if (pvFault - pMapping->GCPtr < pMapping->cb)
625 {
626 /*
627 * The first thing we check is if we've got an undetected conflict.
628 */
629 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
630 {
631 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
632 while (iPT-- > 0)
633 if (GstWalk.pPde[iPT].n.u1Present)
634 {
635 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eConflicts);
636 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
637 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
638 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Mapping; });
639 return VINF_PGM_SYNC_CR3;
640 }
641 }
642
643 /*
644 * Check if the fault address is in a virtual page access handler range.
645 */
646 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
647 if ( pCur
648 && pvFault - pCur->Core.Key < pCur->cb
649 && uErr & X86_TRAP_PF_RW)
650 {
651# ifdef IN_RC
652 STAM_PROFILE_START(&pCur->Stat, h);
653 pgmUnlock(pVM);
654 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
655 pgmLock(pVM);
656 STAM_PROFILE_STOP(&pCur->Stat, h);
657# else
658 AssertFailed();
659 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
660# endif
661 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersMapping);
662 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Mapping; });
663 return rc;
664 }
665
666 /*
667 * Pretend we're not here and let the guest handle the trap.
668 */
669 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
670 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eGuestPFMapping);
671 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
672 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Mapping; });
673 return VINF_EM_RAW_GUEST_TRAP;
674 }
675 }
676 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
677# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
678
679 /*
680 * Check if this fault address is flagged for special treatment,
681 * which means we'll have to figure out the physical address and
682 * check flags associated with it.
683 *
684 * ASSUME that we can limit any special access handling to pages
685 * in page tables which the guest believes to be present.
686 */
687# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
688 RTGCPHYS GCPhys = GstWalk.Core.GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
689# else
690 RTGCPHYS GCPhys = (RTGCPHYS)pvFault & ~(RTGCPHYS)PAGE_OFFSET_MASK;
691# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
692 PPGMPAGE pPage;
693 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
694 if (RT_FAILURE(rc))
695 {
696 /*
697 * When the guest accesses invalid physical memory (e.g. probing
698 * of RAM or accessing a remapped MMIO range), then we'll fall
699 * back to the recompiler to emulate the instruction.
700 */
701 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
702 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eHandlersInvalid);
703 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2InvalidPhys; });
704 return VINF_EM_RAW_EMULATE_INSTR;
705 }
706
707 /*
708 * Any handlers for this page?
709 */
710 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
711# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
712 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, &GstWalk));
713# else
714 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage));
715# endif
716
717 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTimeOutOfSync, c);
718
719# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
720 if (uErr & X86_TRAP_PF_P)
721 {
722 /*
723 * The page isn't marked, but it might still be monitored by a virtual page access handler.
724 * (ASSUMES no temporary disabling of virtual handlers.)
725 */
726 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
727 * we should correct both the shadow page table and physical memory flags, and not only check for
728 * accesses within the handler region but for access to pages with virtual handlers. */
729 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
730 if (pCur)
731 {
732 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
733 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
734 || !(uErr & X86_TRAP_PF_P)
735 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
736 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
737
738 if ( pvFault - pCur->Core.Key < pCur->cb
739 && ( uErr & X86_TRAP_PF_RW
740 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
741 {
742# ifdef IN_RC
743 STAM_PROFILE_START(&pCur->Stat, h);
744 pgmUnlock(pVM);
745 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
746 pgmLock(pVM);
747 STAM_PROFILE_STOP(&pCur->Stat, h);
748# else
749 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
750# endif
751 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2HndVirt; });
752 return rc;
753 }
754 }
755 }
756# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
757
758 /*
759 * We are here only if page is present in Guest page tables and
760 * trap is not handled by our handlers.
761 *
762 * Check it for page out-of-sync situation.
763 */
764 if (!(uErr & X86_TRAP_PF_P))
765 {
766 /*
767 * Page is not present in our page tables. Try to sync it!
768 */
769 if (uErr & X86_TRAP_PF_US)
770 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncUser));
771 else /* supervisor */
772 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
773
774 if (PGM_PAGE_IS_BALLOONED(pPage))
775 {
776 /* Emulate reads from ballooned pages as they are not present in
777 our shadow page tables. (Required for e.g. Solaris guests; soft
778 ecc, random nr generator.) */
779 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
780 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
781 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncBallloon));
782 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Ballooned; });
783 return rc;
784 }
785
786# if defined(LOG_ENABLED) && !defined(IN_RING0)
787 RTGCPHYS GCPhys2;
788 uint64_t fPageGst2;
789 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
790# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
791 Log(("Page out of sync: %RGv eip=%08x PdeSrc.US=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
792 pvFault, pRegFrame->eip, GstWalk.Pde.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
793# else
794 Log(("Page out of sync: %RGv eip=%08x fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
795 pvFault, pRegFrame->eip, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
796# endif
797# endif /* LOG_ENABLED */
798
799# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
800 if ( !GstWalk.Core.fEffectiveUS
801 && CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
802 {
803 /* Note: Can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU. */
804 if ( pvFault == (RTGCPTR)pRegFrame->eip
805 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
806# ifdef CSAM_DETECT_NEW_CODE_PAGES
807 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
808 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
809# endif /* CSAM_DETECT_NEW_CODE_PAGES */
810 )
811 {
812 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
813 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
814 if (rc != VINF_SUCCESS)
815 {
816 /*
817 * CSAM needs to perform a job in ring 3.
818 *
819 * Sync the page before going to the host context; otherwise we'll end up in a loop if
820 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
821 */
822 LogFlow(("CSAM ring 3 job\n"));
823 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
824 AssertRC(rc2);
825
826 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2CSAM; });
827 return rc;
828 }
829 }
830# ifdef CSAM_DETECT_NEW_CODE_PAGES
831 else if ( uErr == X86_TRAP_PF_RW
832 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
833 && pRegFrame->ecx < 0x10000)
834 {
835 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
836 * to detect loading of new code pages.
837 */
838
839 /*
840 * Decode the instruction.
841 */
842 RTGCPTR PC;
843 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs,
844 &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
845 if (rc == VINF_SUCCESS)
846 {
847 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
848 uint32_t cbOp;
849 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
850
851 /* For now we'll restrict this to rep movsw/d instructions */
852 if ( rc == VINF_SUCCESS
853 && pDis->pCurInstr->opcode == OP_MOVSWD
854 && (pDis->prefix & PREFIX_REP))
855 {
856 CSAMMarkPossibleCodePage(pVM, pvFault);
857 }
858 }
859 }
860# endif /* CSAM_DETECT_NEW_CODE_PAGES */
861
862 /*
863 * Mark this page as safe.
864 */
865 /** @todo not correct for pages that contain both code and data!! */
866 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
867 CSAMMarkPage(pVM, pvFault, true);
868 }
869# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
870# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
871 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
872# else
873 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
874# endif
875 if (RT_SUCCESS(rc))
876 {
877 /* The page was successfully synced, return to the guest. */
878 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSync; });
879 return VINF_SUCCESS;
880 }
881 }
882 else /* uErr & X86_TRAP_PF_P: */
883 {
884 /*
885 * Write protected pages are made writable when the guest makes the
886 * first write to it. This happens for pages that are shared, write
887 * monitored or not yet allocated.
888 *
889 * We may also end up here when CR0.WP=0 in the guest.
890 *
891 * Also, a side effect of not flushing global PDEs are out of sync
892 * pages due to physical monitored regions, that are no longer valid.
893 * Assume for now it only applies to the read/write flag.
894 */
895 if (uErr & X86_TRAP_PF_RW)
896 {
897 /*
898 * Check if it is a read-only page.
899 */
900 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
901 {
902 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
903 Assert(!PGM_PAGE_IS_ZERO(pPage));
904 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
905 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2MakeWritable; });
906
907 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
908 if (rc != VINF_SUCCESS)
909 {
910 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
911 return rc;
912 }
913 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
914 return VINF_EM_NO_MEMORY;
915 }
916
917# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
918 /*
919 * Check to see if we need to emulate the instruction if CR0.WP=0.
920 */
921 if ( !GstWalk.Core.fEffectiveRW
922 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
923 && CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
924 {
925 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
926 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
927 if (RT_SUCCESS(rc))
928 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eWPEmulInRZ);
929 else
930 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eWPEmulToR3);
931 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2WPEmulation; });
932 return rc;
933 }
934# endif
935 /// @todo count the above case; else
936 if (uErr & X86_TRAP_PF_US)
937 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
938 else /* supervisor */
939 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
940
941 /*
942 * Sync the page.
943 *
944 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
945 * page is not present, which is not true in this case.
946 */
947# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
948 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
949# else
950 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
951# endif
952 if (RT_SUCCESS(rc))
953 {
954 /*
955 * Page was successfully synced, return to guest but invalidate
956 * the TLB first as the page is very likely to be in it.
957 */
958# if PGM_SHW_TYPE == PGM_TYPE_EPT
959 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
960# else
961 PGM_INVL_PG(pVCpu, pvFault);
962# endif
963# ifdef VBOX_STRICT
964 RTGCPHYS GCPhys2;
965 uint64_t fPageGst;
966 if (!pVM->pgm.s.fNestedPaging)
967 {
968 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
969 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%Rrc fPageGst=%RX64\n", rc, fPageGst));
970 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
971 }
972 uint64_t fPageShw;
973 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
974 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
975# endif /* VBOX_STRICT */
976 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2OutOfSyncHndObs; });
977 return VINF_SUCCESS;
978 }
979 }
980 /** @todo else: WTF are we here? */
981
982# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && defined(VBOX_STRICT)
983 /*
984 * Check for VMM page flags vs. Guest page flags consistency.
985 * Currently only for debug purposes.
986 */
987 if (RT_SUCCESS(rc))
988 {
989 /* Get guest page flags. */
990 uint64_t fPageGst;
991 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
992 if (RT_SUCCESS(rc))
993 {
994 uint64_t fPageShw;
995 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
996
997 /*
998 * Compare page flags.
999 * Note: we have AVL, A, D bits desynched.
1000 */
1001 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
1002 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
1003 }
1004 else
1005 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
1006 }
1007 else
1008 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
1009# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && VBOX_STRICT */
1010 }
1011
1012 /** @todo This point is only ever reached when something goes awry. The
1013 * conclusion here is wrong, it is not a guest trap! Will fix in
1014 * a bit... */
1015
1016# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1017 /*
1018 * Conclusion, this is a guest trap.
1019 */
1020 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
1021 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eGuestPFUnh);
1022 return VINF_EM_RAW_GUEST_TRAP;
1023# else
1024 /* present, but not a monitored page; perhaps the guest is probing physical memory */
1025 return VINF_EM_RAW_EMULATE_INSTR;
1026# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
1027
1028
1029# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1030
1031 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
1032 return VERR_INTERNAL_ERROR;
1033# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1034}
1035#endif /* !IN_RING3 */
1036
1037
1038/**
1039 * Emulation of the invlpg instruction.
1040 *
1041 *
1042 * @returns VBox status code.
1043 *
1044 * @param pVCpu The VMCPU handle.
1045 * @param GCPtrPage Page to invalidate.
1046 *
1047 * @remark ASSUMES that the guest is updating before invalidating. This order
1048 * isn't required by the CPU, so this is speculative and could cause
1049 * trouble.
1050 * @remark No TLB shootdown is done on any other VCPU as we assume that
1051 * invlpg emulation is the *only* reason for calling this function.
1052 * (The guest has to shoot down TLB entries on other CPUs itself)
1053 * Currently true, but keep in mind!
1054 *
1055 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1056 */
1057PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
1058{
1059#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1060 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1061 && PGM_SHW_TYPE != PGM_TYPE_EPT
1062 int rc;
1063 PVM pVM = pVCpu->CTX_SUFF(pVM);
1064 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1065
1066 Assert(PGMIsLockOwner(pVM));
1067
1068 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1069
1070# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1071 if (pPool->cDirtyPages)
1072 pgmPoolResetDirtyPages(pVM);
1073# endif
1074
1075 /*
1076 * Get the shadow PD entry and skip out if this PD isn't present.
1077 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1078 */
1079# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1080 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1081 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1082
1083 /* Fetch the pgm pool shadow descriptor. */
1084 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1085 Assert(pShwPde);
1086
1087# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1088 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1089 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
1090
1091 /* If the shadow PDPE isn't present, then skip the invalidate. */
1092 if (!pPdptDst->a[iPdpt].n.u1Present)
1093 {
1094 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1095 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1096 return VINF_SUCCESS;
1097 }
1098
1099 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1100 PPGMPOOLPAGE pShwPde = NULL;
1101 PX86PDPAE pPDDst;
1102
1103 /* Fetch the pgm pool shadow descriptor. */
1104 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1105 AssertRCSuccessReturn(rc, rc);
1106 Assert(pShwPde);
1107
1108 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1109 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1110
1111# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1112 /* PML4 */
1113 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1114 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1115 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1116 PX86PDPAE pPDDst;
1117 PX86PDPT pPdptDst;
1118 PX86PML4E pPml4eDst;
1119 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1120 if (rc != VINF_SUCCESS)
1121 {
1122 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1123 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1124 return VINF_SUCCESS;
1125 }
1126 Assert(pPDDst);
1127
1128 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1129 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1130
1131 if (!pPdpeDst->n.u1Present)
1132 {
1133 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1134 return VINF_SUCCESS;
1135 }
1136
1137 /* Fetch the pgm pool shadow descriptor. */
1138 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1139 Assert(pShwPde);
1140
1141# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1142
1143 const SHWPDE PdeDst = *pPdeDst;
1144 if (!PdeDst.n.u1Present)
1145 {
1146 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1147 return VINF_SUCCESS;
1148 }
1149
1150# if defined(IN_RC)
1151 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1152 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1153# endif
1154
1155 /*
1156 * Get the guest PD entry and calc big page.
1157 */
1158# if PGM_GST_TYPE == PGM_TYPE_32BIT
1159 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1160 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1161 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1162# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1163 unsigned iPDSrc = 0;
1164# if PGM_GST_TYPE == PGM_TYPE_PAE
1165 X86PDPE PdpeSrcIgn;
1166 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrcIgn);
1167# else /* AMD64 */
1168 PX86PML4E pPml4eSrcIgn;
1169 X86PDPE PdpeSrcIgn;
1170 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrcIgn, &PdpeSrcIgn, &iPDSrc);
1171# endif
1172 GSTPDE PdeSrc;
1173
1174 if (pPDSrc)
1175 PdeSrc = pPDSrc->a[iPDSrc];
1176 else
1177 PdeSrc.u = 0;
1178# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1179 const bool fIsBigPage = PdeSrc.b.u1Size && GST_IS_PSE_ACTIVE(pVCpu);
1180
1181# ifdef IN_RING3
1182 /*
1183 * If a CR3 Sync is pending we may ignore the invalidate page operation
1184 * depending on the kind of sync and if it's a global page or not.
1185 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1186 */
1187# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1188 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1189 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1190 && fIsBigPage
1191 && PdeSrc.b.u1Global
1192 )
1193 )
1194# else
1195 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1196# endif
1197 {
1198 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePageSkipped));
1199 return VINF_SUCCESS;
1200 }
1201# endif /* IN_RING3 */
1202
1203 /*
1204 * Deal with the Guest PDE.
1205 */
1206 rc = VINF_SUCCESS;
1207 if (PdeSrc.n.u1Present)
1208 {
1209 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1210 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1211# ifndef PGM_WITHOUT_MAPPING
1212 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1213 {
1214 /*
1215 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1216 */
1217 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1218 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1219 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1220 }
1221 else
1222# endif /* !PGM_WITHOUT_MAPPING */
1223 if (!fIsBigPage)
1224 {
1225 /*
1226 * 4KB - page.
1227 */
1228 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1229 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1230
1231# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1232 /* Reset the modification counter (OpenSolaris trashes tlb entries very often) */
1233 if (pShwPage->cModifications)
1234 pShwPage->cModifications = 1;
1235# endif
1236
1237# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1238 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1239 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1240# endif
1241 if (pShwPage->GCPhys == GCPhys)
1242 {
1243# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1244 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1245 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1246 if (pPT->a[iPTEDst].n.u1Present)
1247 {
1248 /* This is very unlikely with caching/monitoring enabled. */
1249 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK, iPTEDst);
1250 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1251 }
1252# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1253 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1254 if (RT_SUCCESS(rc))
1255 rc = VINF_SUCCESS;
1256# endif
1257 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePage4KBPages));
1258 PGM_INVL_PG(pVCpu, GCPtrPage);
1259 }
1260 else
1261 {
1262 /*
1263 * The page table address changed.
1264 */
1265 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1266 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1267 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1268 ASMAtomicWriteSize(pPdeDst, 0);
1269 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1270 PGM_INVL_VCPU_TLBS(pVCpu);
1271 }
1272 }
1273 else
1274 {
1275 /*
1276 * 2/4MB - page.
1277 */
1278 /* Before freeing the page, check if anything really changed. */
1279 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1280 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
1281# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1282 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1283 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1284# endif
1285 if ( pShwPage->GCPhys == GCPhys
1286 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1287 {
1288 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1289 /** @todo PAT */
1290 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1291 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1292 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1293 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1294 {
1295 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1296 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1297# if defined(IN_RC)
1298 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1299 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1300# endif
1301 return VINF_SUCCESS;
1302 }
1303 }
1304
1305 /*
1306 * Ok, the page table is present and it's been changed in the guest.
1307 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1308 * We could do this for some flushes in GC too, but we need an algorithm for
1309 * deciding which 4MB pages containing code likely to be executed very soon.
1310 */
1311 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1312 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1313 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1314 ASMAtomicWriteSize(pPdeDst, 0);
1315 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePage4MBPages));
1316 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1317 }
1318 }
1319 else
1320 {
1321 /*
1322 * Page directory is not present, mark shadow PDE not present.
1323 */
1324 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1325 {
1326 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1327 ASMAtomicWriteSize(pPdeDst, 0);
1328 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePagePDNPs));
1329 PGM_INVL_PG(pVCpu, GCPtrPage);
1330 }
1331 else
1332 {
1333 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1334 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,InvalidatePagePDMappings));
1335 }
1336 }
1337# if defined(IN_RC)
1338 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1339 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1340# endif
1341 return rc;
1342
1343#else /* guest real and protected mode */
1344 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1345 return VINF_SUCCESS;
1346#endif
1347}
1348
1349
1350/**
1351 * Update the tracking of shadowed pages.
1352 *
1353 * @param pVCpu The VMCPU handle.
1354 * @param pShwPage The shadow page.
1355 * @param HCPhys The physical page we is being dereferenced.
1356 * @param iPte Shadow PTE index
1357 */
1358DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte)
1359{
1360 PVM pVM = pVCpu->CTX_SUFF(pVM);
1361
1362 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackDeref, a);
1363 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1364
1365 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1366 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1367 * 2. write protect all shadowed pages. I.e. implement caching.
1368 */
1369 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1370
1371 /*
1372 * Find the guest address.
1373 */
1374 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1375 pRam;
1376 pRam = pRam->CTX_SUFF(pNext))
1377 {
1378 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1379 while (iPage-- > 0)
1380 {
1381 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1382 {
1383 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1384
1385 Assert(pShwPage->cPresent);
1386 Assert(pPool->cPresent);
1387 pShwPage->cPresent--;
1388 pPool->cPresent--;
1389
1390 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1391 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackDeref, a);
1392 return;
1393 }
1394 }
1395 }
1396
1397 for (;;)
1398 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1399}
1400
1401
1402/**
1403 * Update the tracking of shadowed pages.
1404 *
1405 * @param pVCpu The VMCPU handle.
1406 * @param pShwPage The shadow page.
1407 * @param u16 The top 16-bit of the pPage->HCPhys.
1408 * @param pPage Pointer to the guest page. this will be modified.
1409 * @param iPTDst The index into the shadow table.
1410 */
1411DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1412{
1413 PVM pVM = pVCpu->CTX_SUFF(pVM);
1414 /*
1415 * Just deal with the simple first time here.
1416 */
1417 if (!u16)
1418 {
1419 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackVirgin);
1420 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1421 /* Save the page table index. */
1422 PGM_PAGE_SET_PTE_INDEX(pPage, iPTDst);
1423 }
1424 else
1425 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1426
1427 /* write back */
1428 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1429 PGM_PAGE_SET_TRACKING(pPage, u16);
1430
1431 /* update statistics. */
1432 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1433 pShwPage->cPresent++;
1434 if (pShwPage->iFirstPresent > iPTDst)
1435 pShwPage->iFirstPresent = iPTDst;
1436}
1437
1438
1439/**
1440 * Modifies a shadow PTE to account for access handlers.
1441 *
1442 * @param pVM The VM handle.
1443 * @param pPage The page in question.
1444 * @param fPteSrc The flags of the source PTE.
1445 * @param pPteDst The shadow PTE (output).
1446 */
1447DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVM pVM, PCPGMPAGE pPage, uint32_t fPteSrc, PSHWPTE pPteDst)
1448{
1449 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1450 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1451 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1452 {
1453#if PGM_SHW_TYPE == PGM_TYPE_EPT
1454 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage);
1455 pPteDst->n.u1Present = 1;
1456 pPteDst->n.u1Execute = 1;
1457 pPteDst->n.u1IgnorePAT = 1;
1458 pPteDst->n.u3EMT = VMX_EPT_MEMTYPE_WB;
1459 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1460#else
1461 pPteDst->u = (fPteSrc & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1462 | PGM_PAGE_GET_HCPHYS(pPage);
1463#endif
1464 }
1465#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1466# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1467 else if ( PGM_PAGE_IS_MMIO(pPage)
1468# if PGM_SHW_TYPE != PGM_TYPE_EPT
1469 && ( (fPteSrc & (X86_PTE_RW /*| X86_PTE_D | X86_PTE_A*/ | X86_PTE_US )) /* #PF handles D & A first. */
1470 == (X86_PTE_RW /*| X86_PTE_D | X86_PTE_A*/)
1471 || BTH_IS_NP_ACTIVE(pVM) )
1472# endif
1473# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1474 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1475# endif
1476 )
1477 {
1478 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1479# if PGM_SHW_TYPE == PGM_TYPE_EPT
1480 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1481 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg;
1482 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1483 pPteDst->n.u1Present = 0;
1484 pPteDst->n.u1Write = 1;
1485 pPteDst->n.u1Execute = 0;
1486 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1487 pPteDst->n.u3EMT = 7;
1488# else
1489 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1490 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P;
1491# endif
1492 }
1493# endif
1494#endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1495 else
1496 {
1497 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1498 pPteDst->u = 0;
1499 }
1500 /** @todo count these kinds of entries. */
1501}
1502
1503
1504/**
1505 * Creates a 4K shadow page for a guest page.
1506 *
1507 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1508 * physical address. The PdeSrc argument only the flags are used. No page
1509 * structured will be mapped in this function.
1510 *
1511 * @param pVCpu The VMCPU handle.
1512 * @param pPteDst Destination page table entry.
1513 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1514 * Can safely assume that only the flags are being used.
1515 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1516 * @param pShwPage Pointer to the shadow page.
1517 * @param iPTDst The index into the shadow table.
1518 *
1519 * @remark Not used for 2/4MB pages!
1520 */
1521DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1522 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1523{
1524 if ( PteSrc.n.u1Present
1525 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1526 {
1527 PVM pVM = pVCpu->CTX_SUFF(pVM);
1528
1529# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1530 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1531 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64)
1532 if (pShwPage->fDirty)
1533 {
1534 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1535 PX86PTPAE pGstPT;
1536
1537 pGstPT = (PX86PTPAE)&pPool->aDirtyPages[pShwPage->idxDirty][0];
1538 pGstPT->a[iPTDst].u = PteSrc.u;
1539 }
1540# endif
1541 /*
1542 * Find the ram range.
1543 */
1544 PPGMPAGE pPage;
1545 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1546 if (RT_SUCCESS(rc))
1547 {
1548 /* Ignore ballooned pages.
1549 Don't return errors or use a fatal assert here as part of a
1550 shadow sync range might included ballooned pages. */
1551 if (PGM_PAGE_IS_BALLOONED(pPage))
1552 {
1553 Assert(!pPteDst->n.u1Present); /** @todo user tracking needs updating if this triggers. */
1554 return;
1555 }
1556
1557#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1558 /* Make the page writable if necessary. */
1559 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1560 && ( PGM_PAGE_IS_ZERO(pPage)
1561 || ( PteSrc.n.u1Write
1562 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1563# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1564 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1565# endif
1566# ifdef VBOX_WITH_PAGE_SHARING
1567 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1568# endif
1569 )
1570 )
1571 )
1572 {
1573 rc = pgmPhysPageMakeWritable(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1574 AssertRC(rc);
1575 }
1576#endif
1577
1578 /*
1579 * Make page table entry.
1580 */
1581 SHWPTE PteDst;
1582 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1583 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage,
1584 PteSrc.u & ~( X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT
1585 | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW),
1586 &PteDst);
1587 else
1588 {
1589#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1590 /*
1591 * If the page or page directory entry is not marked accessed,
1592 * we mark the page not present.
1593 */
1594 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1595 {
1596 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1597 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,AccessedPage));
1598 PteDst.u = 0;
1599 }
1600 /*
1601 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1602 * when the page is modified.
1603 */
1604 else if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1605 {
1606 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPage));
1607 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1608 | PGM_PAGE_GET_HCPHYS(pPage)
1609 | PGM_PTFLAGS_TRACK_DIRTY;
1610 }
1611 else
1612#endif
1613 {
1614 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageSkipped));
1615#if PGM_SHW_TYPE == PGM_TYPE_EPT
1616 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1617 PteDst.n.u1Present = 1;
1618 PteDst.n.u1Write = 1;
1619 PteDst.n.u1Execute = 1;
1620 PteDst.n.u1IgnorePAT = 1;
1621 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1622 /* PteDst.n.u1Size = 0 */
1623#else
1624 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1625 | PGM_PAGE_GET_HCPHYS(pPage);
1626#endif
1627 }
1628
1629 /*
1630 * Make sure only allocated pages are mapped writable.
1631 */
1632 if ( PteDst.n.u1Write
1633 && PteDst.n.u1Present
1634 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1635 {
1636 /* Still applies to shared pages. */
1637 Assert(!PGM_PAGE_IS_ZERO(pPage));
1638 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. Why, isn't it? */
1639 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1640 }
1641 }
1642
1643 /*
1644 * Keep user track up to date.
1645 */
1646 if (PteDst.n.u1Present)
1647 {
1648 if (!pPteDst->n.u1Present)
1649 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1650 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1651 {
1652 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1653 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1654 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1655 }
1656 }
1657 else if (pPteDst->n.u1Present)
1658 {
1659 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1660 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1661 }
1662
1663 /*
1664 * Update statistics and commit the entry.
1665 */
1666#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1667 if (!PteSrc.n.u1Global)
1668 pShwPage->fSeenNonGlobal = true;
1669#endif
1670 ASMAtomicWriteSize(pPteDst, PteDst.u);
1671 return;
1672 }
1673
1674/** @todo count these three different kinds. */
1675 Log2(("SyncPageWorker: invalid address in Pte\n"));
1676 }
1677 else if (!PteSrc.n.u1Present)
1678 Log2(("SyncPageWorker: page not present in Pte\n"));
1679 else
1680 Log2(("SyncPageWorker: invalid Pte\n"));
1681
1682 /*
1683 * The page is not present or the PTE is bad. Replace the shadow PTE by
1684 * an empty entry, making sure to keep the user tracking up to date.
1685 */
1686 if (pPteDst->n.u1Present)
1687 {
1688 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1689 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1690 }
1691 ASMAtomicWriteSize(pPteDst, 0);
1692}
1693
1694
1695/**
1696 * Syncs a guest OS page.
1697 *
1698 * There are no conflicts at this point, neither is there any need for
1699 * page table allocations.
1700 *
1701 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
1702 * When called in AMD64 guest mode, the guest PML4E shall be valid.
1703 *
1704 * @returns VBox status code.
1705 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1706 * @param pVCpu The VMCPU handle.
1707 * @param PdeSrc Page directory entry of the guest.
1708 * @param GCPtrPage Guest context page address.
1709 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1710 * @param uErr Fault error (X86_TRAP_PF_*).
1711 */
1712PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1713{
1714 PVM pVM = pVCpu->CTX_SUFF(pVM);
1715 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1716 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1717
1718 Assert(PGMIsLockOwner(pVM));
1719
1720#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1721 || PGM_GST_TYPE == PGM_TYPE_PAE \
1722 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1723 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1724 && PGM_SHW_TYPE != PGM_TYPE_EPT
1725
1726 /*
1727 * Assert preconditions.
1728 */
1729 Assert(PdeSrc.n.u1Present);
1730 Assert(cPages);
1731# if 0 /* rarely useful; leave for debugging. */
1732 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1733# endif
1734
1735 /*
1736 * Get the shadow PDE, find the shadow page table in the pool.
1737 */
1738# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1739 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1740 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1741
1742 /* Fetch the pgm pool shadow descriptor. */
1743 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1744 Assert(pShwPde);
1745
1746# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1747 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1748 PPGMPOOLPAGE pShwPde = NULL;
1749 PX86PDPAE pPDDst;
1750
1751 /* Fetch the pgm pool shadow descriptor. */
1752 int rc2 = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1753 AssertRCSuccessReturn(rc2, rc2);
1754 Assert(pShwPde);
1755
1756 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1757 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1758
1759# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1760 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1761 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1762 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1763 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1764
1765 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1766 AssertRCSuccessReturn(rc2, rc2);
1767 Assert(pPDDst && pPdptDst);
1768 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1769# endif
1770 SHWPDE PdeDst = *pPdeDst;
1771
1772 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1773 if (!PdeDst.n.u1Present)
1774 {
1775 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE p=%p/%RX64\n", pPdeDst, (uint64_t)PdeDst.u));
1776 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1777 return VINF_SUCCESS; /* force the instruction to be executed again. */
1778 }
1779
1780 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1781 Assert(pShwPage);
1782
1783# if PGM_GST_TYPE == PGM_TYPE_AMD64
1784 /* Fetch the pgm pool shadow descriptor. */
1785 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1786 Assert(pShwPde);
1787# endif
1788
1789# if defined(IN_RC)
1790 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1791 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1792# endif
1793
1794 /*
1795 * Check that the page is present and that the shadow PDE isn't out of sync.
1796 */
1797 const bool fBigPage = PdeSrc.b.u1Size && GST_IS_PSE_ACTIVE(pVCpu);
1798 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
1799 RTGCPHYS GCPhys;
1800 if (!fBigPage)
1801 {
1802 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1803# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1804 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1805 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1806# endif
1807 }
1808 else
1809 {
1810 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
1811# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1812 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1813 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1814# endif
1815 }
1816 if ( fPdeValid
1817 && pShwPage->GCPhys == GCPhys
1818 && PdeSrc.n.u1Present
1819 && PdeSrc.n.u1User == PdeDst.n.u1User
1820 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1821# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1822 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !GST_IS_NX_ACTIVE(pVCpu))
1823# endif
1824 )
1825 {
1826 /*
1827 * Check that the PDE is marked accessed already.
1828 * Since we set the accessed bit *before* getting here on a #PF, this
1829 * check is only meant for dealing with non-#PF'ing paths.
1830 */
1831 if (PdeSrc.n.u1Accessed)
1832 {
1833 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1834 if (!fBigPage)
1835 {
1836 /*
1837 * 4KB Page - Map the guest page table.
1838 */
1839 PGSTPT pPTSrc;
1840 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1841 if (RT_SUCCESS(rc))
1842 {
1843# ifdef PGM_SYNC_N_PAGES
1844 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1845 if ( cPages > 1
1846 && !(uErr & X86_TRAP_PF_P)
1847 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1848 {
1849 /*
1850 * This code path is currently only taken when the caller is PGMTrap0eHandler
1851 * for non-present pages!
1852 *
1853 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1854 * deal with locality.
1855 */
1856 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1857# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1858 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1859 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1860# else
1861 const unsigned offPTSrc = 0;
1862# endif
1863 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1864 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1865 iPTDst = 0;
1866 else
1867 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1868 for (; iPTDst < iPTDstEnd; iPTDst++)
1869 {
1870 if (!pPTDst->a[iPTDst].n.u1Present)
1871 {
1872 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1873 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1874 NOREF(GCPtrCurPage);
1875#ifndef IN_RING0
1876 /*
1877 * Assuming kernel code will be marked as supervisor - and not as user level
1878 * and executed using a conforming code selector - And marked as readonly.
1879 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1880 */
1881 PPGMPAGE pPage;
1882 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1883 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1884 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
1885 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1886 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1887 )
1888#endif /* else: CSAM not active */
1889 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1890 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1891 GCPtrCurPage, PteSrc.n.u1Present,
1892 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1893 PteSrc.n.u1User & PdeSrc.n.u1User,
1894 (uint64_t)PteSrc.u,
1895 (uint64_t)pPTDst->a[iPTDst].u,
1896 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1897 }
1898 }
1899 }
1900 else
1901# endif /* PGM_SYNC_N_PAGES */
1902 {
1903 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1904 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1905 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1906 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1907 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1908 GCPtrPage, PteSrc.n.u1Present,
1909 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1910 PteSrc.n.u1User & PdeSrc.n.u1User,
1911 (uint64_t)PteSrc.u,
1912 (uint64_t)pPTDst->a[iPTDst].u,
1913 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1914 }
1915 }
1916 else /* MMIO or invalid page: emulated in #PF handler. */
1917 {
1918 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1919 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1920 }
1921 }
1922 else
1923 {
1924 /*
1925 * 4/2MB page - lazy syncing shadow 4K pages.
1926 * (There are many causes of getting here, it's no longer only CSAM.)
1927 */
1928 /* Calculate the GC physical address of this 4KB shadow page. */
1929 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1930 /* Find ram range. */
1931 PPGMPAGE pPage;
1932 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1933 if (RT_SUCCESS(rc))
1934 {
1935 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
1936
1937# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1938 /* Try to make the page writable if necessary. */
1939 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1940 && ( PGM_PAGE_IS_ZERO(pPage)
1941 || ( PdeSrc.n.u1Write
1942 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1943# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1944 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1945# endif
1946# ifdef VBOX_WITH_PAGE_SHARING
1947 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1948# endif
1949 )
1950 )
1951 )
1952 {
1953 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1954 AssertRC(rc);
1955 }
1956# endif
1957
1958 /*
1959 * Make shadow PTE entry.
1960 */
1961 SHWPTE PteDst;
1962 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1963 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage,
1964 PdeSrc.u & ~( X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK
1965 | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT),
1966 &PteDst);
1967 else
1968 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1969 | PGM_PAGE_GET_HCPHYS(pPage);
1970
1971 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1972 if ( PteDst.n.u1Present
1973 && !pPTDst->a[iPTDst].n.u1Present)
1974 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1975
1976 /* Make sure only allocated pages are mapped writable. */
1977 if ( PteDst.n.u1Write
1978 && PteDst.n.u1Present
1979 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1980 {
1981 /* Still applies to shared pages. */
1982 Assert(!PGM_PAGE_IS_ZERO(pPage));
1983 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1984 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1985 }
1986
1987 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1988
1989 /*
1990 * If the page is not flagged as dirty and is writable, then make it read-only
1991 * at PD level, so we can set the dirty bit when the page is modified.
1992 *
1993 * ASSUMES that page access handlers are implemented on page table entry level.
1994 * Thus we will first catch the dirty access and set PDE.D and restart. If
1995 * there is an access handler, we'll trap again and let it work on the problem.
1996 */
1997 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1998 * As for invlpg, it simply frees the whole shadow PT.
1999 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
2000 if ( !PdeSrc.b.u1Dirty
2001 && PdeSrc.b.u1Write)
2002 {
2003 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageBig));
2004 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2005 PdeDst.n.u1Write = 0;
2006 }
2007 else
2008 {
2009 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2010 PdeDst.n.u1Write = PdeSrc.n.u1Write;
2011 }
2012 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2013 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
2014 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
2015 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2016 }
2017 else
2018 {
2019 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
2020 /** @todo must wipe the shadow page table in this case. */
2021 }
2022 }
2023# if defined(IN_RC)
2024 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2025 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2026# endif
2027 return VINF_SUCCESS;
2028 }
2029
2030 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPagePDNAs));
2031 }
2032 else if (fPdeValid)
2033 {
2034 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2035 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2036 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2037 }
2038 else
2039 {
2040/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2041 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2042 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2043 }
2044
2045 /*
2046 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
2047 * Yea, I'm lazy.
2048 */
2049 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
2050 ASMAtomicWriteSize(pPdeDst, 0);
2051
2052# if defined(IN_RC)
2053 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2054 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2055# endif
2056 PGM_INVL_VCPU_TLBS(pVCpu);
2057 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
2058
2059
2060#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2061 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2062 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2063 && !defined(IN_RC)
2064
2065# ifdef PGM_SYNC_N_PAGES
2066 /*
2067 * Get the shadow PDE, find the shadow page table in the pool.
2068 */
2069# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2070 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
2071
2072# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2073 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
2074
2075# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2076 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2077 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2078 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2079 X86PDEPAE PdeDst;
2080 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2081
2082 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2083 AssertRCSuccessReturn(rc, rc);
2084 Assert(pPDDst && pPdptDst);
2085 PdeDst = pPDDst->a[iPDDst];
2086# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2087 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2088 PEPTPD pPDDst;
2089 EPTPDE PdeDst;
2090
2091 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2092 if (rc != VINF_SUCCESS)
2093 {
2094 AssertRC(rc);
2095 return rc;
2096 }
2097 Assert(pPDDst);
2098 PdeDst = pPDDst->a[iPDDst];
2099# endif
2100 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2101 if (!PdeDst.n.u1Present)
2102 {
2103 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2104 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2105 return VINF_SUCCESS; /* force the instruction to be executed again. */
2106 }
2107
2108 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2109 if (PdeDst.n.u1Size)
2110 {
2111 Assert(pVM->pgm.s.fNestedPaging);
2112 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2113 return VINF_SUCCESS;
2114 }
2115
2116 /* Mask away the page offset. */
2117 GCPtrPage &= ~((RTGCPTR)0xfff);
2118
2119 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2120 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2121
2122 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2123 if ( cPages > 1
2124 && !(uErr & X86_TRAP_PF_P)
2125 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2126 {
2127 /*
2128 * This code path is currently only taken when the caller is PGMTrap0eHandler
2129 * for non-present pages!
2130 *
2131 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2132 * deal with locality.
2133 */
2134 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2135 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2136 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2137 iPTDst = 0;
2138 else
2139 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2140 for (; iPTDst < iPTDstEnd; iPTDst++)
2141 {
2142 if (!pPTDst->a[iPTDst].n.u1Present)
2143 {
2144 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2145 GSTPTE PteSrc;
2146
2147 /* Fake the page table entry */
2148 PteSrc.u = GCPtrCurPage;
2149 PteSrc.n.u1Present = 1;
2150 PteSrc.n.u1Dirty = 1;
2151 PteSrc.n.u1Accessed = 1;
2152 PteSrc.n.u1Write = 1;
2153 PteSrc.n.u1User = 1;
2154
2155 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2156
2157 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2158 GCPtrCurPage, PteSrc.n.u1Present,
2159 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2160 PteSrc.n.u1User & PdeSrc.n.u1User,
2161 (uint64_t)PteSrc.u,
2162 (uint64_t)pPTDst->a[iPTDst].u,
2163 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2164
2165 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2166 break;
2167 }
2168 else
2169 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2170 }
2171 }
2172 else
2173# endif /* PGM_SYNC_N_PAGES */
2174 {
2175 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2176 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2177 GSTPTE PteSrc;
2178
2179 /* Fake the page table entry */
2180 PteSrc.u = GCPtrCurPage;
2181 PteSrc.n.u1Present = 1;
2182 PteSrc.n.u1Dirty = 1;
2183 PteSrc.n.u1Accessed = 1;
2184 PteSrc.n.u1Write = 1;
2185 PteSrc.n.u1User = 1;
2186 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2187
2188 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2189 GCPtrPage, PteSrc.n.u1Present,
2190 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2191 PteSrc.n.u1User & PdeSrc.n.u1User,
2192 (uint64_t)PteSrc.u,
2193 (uint64_t)pPTDst->a[iPTDst].u,
2194 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2195 }
2196 return VINF_SUCCESS;
2197
2198#else
2199 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2200 return VERR_INTERNAL_ERROR;
2201#endif
2202}
2203
2204
2205#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2206
2207/**
2208 * CheckPageFault helper for returning a page fault indicating a non-present
2209 * (NP) entry in the page translation structures.
2210 *
2211 * @returns VINF_EM_RAW_GUEST_TRAP.
2212 * @param pVCpu The virtual CPU to operate on.
2213 * @param uErr The error code of the shadow fault. Corrections to
2214 * TRPM's copy will be made if necessary.
2215 * @param GCPtrPage For logging.
2216 * @param uPageFaultLevel For logging.
2217 */
2218DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnNP)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2219{
2220 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyTrackRealPF));
2221 AssertMsg(!(uErr & X86_TRAP_PF_P), ("%#x\n", uErr));
2222 AssertMsg(!(uErr & X86_TRAP_PF_RSVD), ("%#x\n", uErr));
2223 if (uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2224 TRPMSetErrorCode(pVCpu, uErr & ~(X86_TRAP_PF_RSVD | X86_TRAP_PF_P));
2225
2226 Log(("CheckPageFault: real page fault (notp) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2227 return VINF_EM_RAW_GUEST_TRAP;
2228}
2229
2230
2231/**
2232 * CheckPageFault helper for returning a page fault indicating a reserved bit
2233 * (RSVD) error in the page translation structures.
2234 *
2235 * @returns VINF_EM_RAW_GUEST_TRAP.
2236 * @param pVCpu The virtual CPU to operate on.
2237 * @param uErr The error code of the shadow fault. Corrections to
2238 * TRPM's copy will be made if necessary.
2239 * @param GCPtrPage For logging.
2240 * @param uPageFaultLevel For logging.
2241 */
2242DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnRSVD)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2243{
2244 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyTrackRealPF));
2245 if ((uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2246 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_RSVD | X86_TRAP_PF_P);
2247
2248 Log(("CheckPageFault: real page fault (rsvd) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2249 return VINF_EM_RAW_GUEST_TRAP;
2250}
2251
2252
2253/**
2254 * CheckPageFault helper for returning a page protection fault (P).
2255 *
2256 * @returns VINF_EM_RAW_GUEST_TRAP.
2257 * @param pVCpu The virtual CPU to operate on.
2258 * @param uErr The error code of the shadow fault. Corrections to
2259 * TRPM's copy will be made if necessary.
2260 * @param GCPtrPage For logging.
2261 * @param uPageFaultLevel For logging.
2262 */
2263DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnProt)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2264{
2265 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyTrackRealPF));
2266 AssertMsg(uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID), ("%#x\n", uErr));
2267 if ((uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) != X86_TRAP_PF_P)
2268 TRPMSetErrorCode(pVCpu, (uErr & ~X86_TRAP_PF_RSVD) | X86_TRAP_PF_P);
2269
2270 Log(("CheckPageFault: real page fault (prot) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2271 return VINF_EM_RAW_GUEST_TRAP;
2272}
2273
2274
2275/**
2276 * Investigate a page fault to identify ones targetted at the guest and to
2277 * handle write protection page faults caused by dirty bit tracking.
2278 *
2279 * This will do detect invalid entries and raise X86_TRAP_PF_RSVD.
2280 *
2281 * @returns VBox status code.
2282 * @param pVCpu The VMCPU handle.
2283 * @param uErr Page fault error code. The X86_TRAP_PF_RSVD flag
2284 * cannot be trusted as it is used for MMIO optimizations.
2285 * @param pPdeSrc Guest page directory entry.
2286 * @param GCPtrPage Guest context page address.
2287 */
2288PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2289{
2290 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2291 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2292# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2293 bool fMaybeNXEFault = (uErr & X86_TRAP_PF_ID) && GST_IS_NX_ACTIVE(pVCpu);
2294# endif
2295 bool fMaybeWriteProtFault = fWriteFault && (fUserLevelFault || CPUMIsGuestR0WriteProtEnabled(pVCpu));
2296 PVM pVM = pVCpu->CTX_SUFF(pVM);
2297 int rc;
2298
2299 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2300
2301 /*
2302 * Note! For PAE it is safe to assume that bad guest physical addresses
2303 * (which returns all FFs) in the translation tables will cause
2304 * #PF(RSVD). The same will be the case for long mode provided the
2305 * physical address width is less than 52 bits - this we ASSUME.
2306 *
2307 * Note! No convenient shortcuts here, we have to validate everything!
2308 */
2309
2310# if PGM_GST_TYPE == PGM_TYPE_AMD64
2311 /*
2312 * Real page fault? (PML4E level)
2313 */
2314 PX86PML4 pPml4Src = pgmGstGetLongModePML4Ptr(pVCpu);
2315 if (RT_UNLIKELY(!pPml4Src))
2316 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 0);
2317
2318 PX86PML4E pPml4eSrc = &pPml4Src->a[(GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK];
2319 if (!pPml4eSrc->n.u1Present)
2320 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 0);
2321 if (RT_UNLIKELY(!GST_IS_PML4E_VALID(pVCpu, *pPml4eSrc)))
2322 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 0);
2323 if ( (fMaybeWriteProtFault && !pPml4eSrc->n.u1Write)
2324 || (fMaybeNXEFault && pPml4eSrc->n.u1NoExecute)
2325 || (fUserLevelFault && !pPml4eSrc->n.u1User) )
2326 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 0);
2327
2328 /*
2329 * Real page fault? (PDPE level)
2330 */
2331 PX86PDPT pPdptSrc;
2332 rc = PGM_GCPHYS_2_PTR_BY_VMCPU(pVCpu, pPml4eSrc->u & X86_PML4E_PG_MASK, &pPdptSrc);
2333 if (RT_FAILURE(rc))
2334 {
2335 AssertMsgReturn(rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS, ("%Rrc\n", rc), rc);
2336 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2337 }
2338
2339 PX86PDPE pPdpeSrc = &pPdptSrc->a[(GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64];
2340 if (!pPdpeSrc->n.u1Present)
2341 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 1);
2342 if (!GST_IS_PDPE_VALID(pVCpu, *pPdpeSrc))
2343 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2344 if ( (fMaybeWriteProtFault && !pPdpeSrc->lm.u1Write)
2345 || (fMaybeNXEFault && pPdpeSrc->lm.u1NoExecute)
2346 || (fUserLevelFault && !pPdpeSrc->lm.u1User) )
2347 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 1);
2348
2349# elif PGM_GST_TYPE == PGM_TYPE_PAE
2350 /*
2351 * Real page fault? (PDPE level)
2352 */
2353 PX86PDPT pPdptSrc = pgmGstGetPaePDPTPtr(pVCpu);
2354 if (RT_UNLIKELY(!pPdptSrc))
2355 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2356/** @todo Handle bad CR3 address. */
2357 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(pVCpu, GCPtrPage);
2358 if (!pPdpeSrc->n.u1Present)
2359 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 1);
2360 if (!GST_IS_PDPE_VALID(pVCpu, *pPdpeSrc))
2361 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2362# endif /* PGM_GST_TYPE == PGM_TYPE_PAE */
2363
2364 /*
2365 * Real page fault? (PDE level)
2366 */
2367 if (!pPdeSrc->n.u1Present)
2368 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 2);
2369 bool const fBigPage = pPdeSrc->b.u1Size && GST_IS_PSE_ACTIVE(pVCpu);
2370 if (!fBigPage ? !GST_IS_PDE_VALID(pVCpu, *pPdeSrc) : !GST_IS_BIG_PDE_VALID(pVCpu, *pPdeSrc))
2371 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 2);
2372 if ( (fMaybeWriteProtFault && !pPdeSrc->n.u1Write)
2373# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2374 || (fMaybeNXEFault && pPdeSrc->n.u1NoExecute)
2375# endif
2376 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2377 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 2);
2378
2379 /*
2380 * First check the easy case where the page directory has been marked
2381 * read-only to track the dirty bit of an emulated BIG page.
2382 */
2383 if (fBigPage)
2384 {
2385 /* Mark guest page directory as accessed */
2386# if PGM_GST_TYPE == PGM_TYPE_AMD64
2387 pPml4eSrc->n.u1Accessed = 1;
2388 pPdpeSrc->lm.u1Accessed = 1;
2389# endif
2390 pPdeSrc->b.u1Accessed = 1;
2391
2392 /* Mark the entry guest PDE dirty it it's a write access. */
2393 if (fWriteFault)
2394 pPdeSrc->b.u1Dirty = 1;
2395 }
2396 else
2397 {
2398 /*
2399 * Map the guest page table.
2400 */
2401 PGSTPT pPTSrc;
2402 PGSTPTE pPteSrc;
2403 GSTPTE PteSrc;
2404 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2405 if (RT_SUCCESS(rc))
2406 {
2407 pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2408 PteSrc.u = pPteSrc->u;
2409 }
2410 else if (rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS)
2411 {
2412 /* All bits in the PTE are set. */
2413# if PGM_GST_TYPE == PGM_TYPE_32BIT
2414 PteSrc.u = UINT32_MAX;
2415# else
2416 PteSrc.u = UINT64_MAX;
2417# endif
2418 pPteSrc = &PteSrc;
2419 }
2420 else
2421 {
2422 AssertRC(rc);
2423 return rc;
2424 }
2425
2426 /*
2427 * Real page fault?
2428 */
2429 if (!PteSrc.n.u1Present)
2430 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 3);
2431 if (!GST_IS_PTE_VALID(pVCpu, PteSrc))
2432 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 3);
2433 if ( (fMaybeWriteProtFault && !PteSrc.n.u1Write)
2434# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2435 || (fMaybeNXEFault && PteSrc.n.u1NoExecute)
2436# endif
2437 || (fUserLevelFault && !PteSrc.n.u1User) )
2438 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 0);
2439
2440 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2441
2442 /*
2443 * Set the accessed bits in the page directory and the page table.
2444 */
2445# if PGM_GST_TYPE == PGM_TYPE_AMD64
2446 pPml4eSrc->n.u1Accessed = 1;
2447 pPdpeSrc->lm.u1Accessed = 1;
2448# endif
2449 pPdeSrc->n.u1Accessed = 1;
2450 pPteSrc->n.u1Accessed = 1;
2451
2452 /*
2453 * Set the dirty flag in the PTE if it's a write access.
2454 */
2455 if (fWriteFault)
2456 {
2457# ifdef VBOX_WITH_STATISTICS
2458 if (!pPteSrc->n.u1Dirty)
2459 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtiedPage));
2460 else
2461 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageAlreadyDirty));
2462# endif
2463
2464 pPteSrc->n.u1Dirty = 1;
2465 }
2466 }
2467 return VINF_SUCCESS;
2468}
2469
2470
2471/**
2472 * Handle dirty bit tracking faults.
2473 *
2474 * @returns VBox status code.
2475 * @param pVCpu The VMCPU handle.
2476 * @param uErr Page fault error code.
2477 * @param pPdeSrc Guest page directory entry.
2478 * @param pPdeDst Shadow page directory entry.
2479 * @param GCPtrPage Guest context page address.
2480 */
2481PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage)
2482{
2483 PVM pVM = pVCpu->CTX_SUFF(pVM);
2484 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2485
2486 Assert(PGMIsLockOwner(pVM));
2487
2488 /*
2489 * Handle big page.
2490 */
2491 if (pPdeSrc->b.u1Size && GST_IS_PSE_ACTIVE(pVCpu))
2492 {
2493 if ( pPdeDst->n.u1Present
2494 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2495 {
2496 SHWPDE PdeDst = *pPdeDst;
2497
2498 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageTrap));
2499 Assert(pPdeSrc->b.u1Write);
2500
2501 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2502 * fault again and take this path to only invalidate the entry (see below).
2503 */
2504 PdeDst.n.u1Write = 1;
2505 PdeDst.n.u1Accessed = 1;
2506 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2507 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2508 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2509 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2510 }
2511
2512# ifdef IN_RING0
2513 /* Check for stale TLB entry; only applies to the SMP guest case. */
2514 if ( pVM->cCpus > 1
2515 && pPdeDst->n.u1Write
2516 && pPdeDst->n.u1Accessed)
2517 {
2518 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2519 if (pShwPage)
2520 {
2521 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2522 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2523 if ( pPteDst->n.u1Present
2524 && pPteDst->n.u1Write)
2525 {
2526 /* Stale TLB entry. */
2527 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageStale));
2528 PGM_INVL_PG(pVCpu, GCPtrPage);
2529 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2530 }
2531 }
2532 }
2533# endif /* IN_RING0 */
2534 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2535 }
2536
2537 /*
2538 * Map the guest page table.
2539 */
2540 PGSTPT pPTSrc;
2541 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2542 if (RT_FAILURE(rc))
2543 {
2544 AssertRC(rc);
2545 return rc;
2546 }
2547
2548 if (pPdeDst->n.u1Present)
2549 {
2550 GSTPTE const *pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2551 const GSTPTE PteSrc = *pPteSrc;
2552
2553#ifndef IN_RING0
2554 /* Bail out here as pgmPoolGetPage will return NULL and we'll crash below.
2555 * Our individual shadow handlers will provide more information and force a fatal exit.
2556 */
2557 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2558 {
2559 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2560 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2561 }
2562#endif
2563 /*
2564 * Map shadow page table.
2565 */
2566 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2567 if (pShwPage)
2568 {
2569 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2570 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2571 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2572 {
2573 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2574 {
2575 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2576 SHWPTE PteDst = *pPteDst;
2577
2578 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2579 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageTrap));
2580
2581 Assert(pPteSrc->n.u1Write);
2582
2583 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
2584 * entry will not harm; write access will simply fault again and
2585 * take this path to only invalidate the entry.
2586 */
2587 if (RT_LIKELY(pPage))
2588 {
2589 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2590 {
2591 AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
2592 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
2593 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2594 PteDst.n.u1Write = 0;
2595 }
2596 else
2597 {
2598 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2599 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2600 {
2601 rc = pgmPhysPageMakeWritable(pVM, pPage, pPteSrc->u & GST_PTE_PG_MASK);
2602 AssertRC(rc);
2603 }
2604 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2605 PteDst.n.u1Write = 1;
2606 else
2607 {
2608 /* Still applies to shared pages. */
2609 Assert(!PGM_PAGE_IS_ZERO(pPage));
2610 PteDst.n.u1Write = 0;
2611 }
2612 }
2613 }
2614 else
2615 PteDst.n.u1Write = 1; /** @todo r=bird: This doesn't make sense to me. */
2616
2617 PteDst.n.u1Dirty = 1;
2618 PteDst.n.u1Accessed = 1;
2619 PteDst.au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2620 ASMAtomicWriteSize(pPteDst, PteDst.u);
2621 PGM_INVL_PG(pVCpu, GCPtrPage);
2622 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2623 }
2624
2625# ifdef IN_RING0
2626 /* Check for stale TLB entry; only applies to the SMP guest case. */
2627 if ( pVM->cCpus > 1
2628 && pPteDst->n.u1Write == 1
2629 && pPteDst->n.u1Accessed == 1)
2630 {
2631 /* Stale TLB entry. */
2632 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageStale));
2633 PGM_INVL_PG(pVCpu, GCPtrPage);
2634 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2635 }
2636# endif
2637 }
2638 }
2639 else
2640 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2641 }
2642
2643 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2644}
2645
2646#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2647
2648
2649/**
2650 * Sync a shadow page table.
2651 *
2652 * The shadow page table is not present. This includes the case where
2653 * there is a conflict with a mapping.
2654 *
2655 * @returns VBox status code.
2656 * @param pVCpu The VMCPU handle.
2657 * @param iPD Page directory index.
2658 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2659 * Assume this is a temporary mapping.
2660 * @param GCPtrPage GC Pointer of the page that caused the fault
2661 */
2662PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2663{
2664 PVM pVM = pVCpu->CTX_SUFF(pVM);
2665 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2666
2667 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
2668#if 0 /* rarely useful; leave for debugging. */
2669 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2670#endif
2671 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2672
2673 Assert(PGMIsLocked(pVM));
2674
2675#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2676 || PGM_GST_TYPE == PGM_TYPE_PAE \
2677 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2678 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2679 && PGM_SHW_TYPE != PGM_TYPE_EPT
2680
2681 int rc = VINF_SUCCESS;
2682
2683 /*
2684 * Validate input a little bit.
2685 */
2686 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2687# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2688 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2689 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2690
2691 /* Fetch the pgm pool shadow descriptor. */
2692 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2693 Assert(pShwPde);
2694
2695# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2696 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2697 PPGMPOOLPAGE pShwPde = NULL;
2698 PX86PDPAE pPDDst;
2699 PSHWPDE pPdeDst;
2700
2701 /* Fetch the pgm pool shadow descriptor. */
2702 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2703 AssertRCSuccessReturn(rc, rc);
2704 Assert(pShwPde);
2705
2706 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2707 pPdeDst = &pPDDst->a[iPDDst];
2708
2709# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2710 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2711 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2712 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2713 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2714 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2715 AssertRCSuccessReturn(rc, rc);
2716 Assert(pPDDst);
2717 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2718# endif
2719 SHWPDE PdeDst = *pPdeDst;
2720
2721# if PGM_GST_TYPE == PGM_TYPE_AMD64
2722 /* Fetch the pgm pool shadow descriptor. */
2723 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2724 Assert(pShwPde);
2725# endif
2726
2727# ifndef PGM_WITHOUT_MAPPINGS
2728 /*
2729 * Check for conflicts.
2730 * RC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2731 * R3: Simply resolve the conflict.
2732 */
2733 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2734 {
2735 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2736# ifndef IN_RING3
2737 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2738 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
2739 return VERR_ADDRESS_CONFLICT;
2740
2741# else /* IN_RING3 */
2742 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2743 Assert(pMapping);
2744# if PGM_GST_TYPE == PGM_TYPE_32BIT
2745 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2746# elif PGM_GST_TYPE == PGM_TYPE_PAE
2747 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2748# else
2749 AssertFailed(); /* can't happen for amd64 */
2750# endif
2751 if (RT_FAILURE(rc))
2752 {
2753 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
2754 return rc;
2755 }
2756 PdeDst = *pPdeDst;
2757# endif /* IN_RING3 */
2758 }
2759# endif /* !PGM_WITHOUT_MAPPINGS */
2760 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2761
2762# if defined(IN_RC)
2763 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2764 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2765# endif
2766
2767 /*
2768 * Sync page directory entry.
2769 */
2770 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2771 if (PdeSrc.n.u1Present)
2772 {
2773 /*
2774 * Allocate & map the page table.
2775 */
2776 PSHWPT pPTDst;
2777 const bool fPageTable = !PdeSrc.b.u1Size || !GST_IS_PSE_ACTIVE(pVCpu);
2778 PPGMPOOLPAGE pShwPage;
2779 RTGCPHYS GCPhys;
2780 if (fPageTable)
2781 {
2782 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2783# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2784 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2785 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2786# endif
2787 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2788 }
2789 else
2790 {
2791 PGMPOOLACCESS enmAccess;
2792# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2793 const bool fNoExecute = PdeSrc.n.u1NoExecute && GST_IS_NX_ACTIVE(pVCpu);
2794# else
2795 const bool fNoExecute = false;
2796# endif
2797
2798 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
2799# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2800 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2801 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2802# endif
2803 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2804 if (PdeSrc.n.u1User)
2805 {
2806 if (PdeSrc.n.u1Write)
2807 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2808 else
2809 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2810 }
2811 else
2812 {
2813 if (PdeSrc.n.u1Write)
2814 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2815 else
2816 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2817 }
2818 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2819 }
2820 if (rc == VINF_SUCCESS)
2821 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2822 else if (rc == VINF_PGM_CACHED_PAGE)
2823 {
2824 /*
2825 * The PT was cached, just hook it up.
2826 */
2827 if (fPageTable)
2828 PdeDst.u = pShwPage->Core.Key
2829 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2830 else
2831 {
2832 PdeDst.u = pShwPage->Core.Key
2833 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2834 /* (see explanation and assumptions further down.) */
2835 if ( !PdeSrc.b.u1Dirty
2836 && PdeSrc.b.u1Write)
2837 {
2838 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageBig));
2839 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2840 PdeDst.b.u1Write = 0;
2841 }
2842 }
2843 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2844# if defined(IN_RC)
2845 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2846# endif
2847 return VINF_SUCCESS;
2848 }
2849 else if (rc == VERR_PGM_POOL_FLUSHED)
2850 {
2851 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2852# if defined(IN_RC)
2853 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2854# endif
2855 return VINF_PGM_SYNC_CR3;
2856 }
2857 else
2858 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2859 PdeDst.u &= X86_PDE_AVL_MASK;
2860 PdeDst.u |= pShwPage->Core.Key;
2861
2862 /*
2863 * Page directory has been accessed (this is a fault situation, remember).
2864 */
2865 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2866 if (fPageTable)
2867 {
2868 /*
2869 * Page table - 4KB.
2870 *
2871 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2872 */
2873 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2874 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2875 PGSTPT pPTSrc;
2876 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2877 if (RT_SUCCESS(rc))
2878 {
2879 /*
2880 * Start by syncing the page directory entry so CSAM's TLB trick works.
2881 */
2882 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2883 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2884 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2885# if defined(IN_RC)
2886 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2887# endif
2888
2889 /*
2890 * Directory/page user or supervisor privilege: (same goes for read/write)
2891 *
2892 * Directory Page Combined
2893 * U/S U/S U/S
2894 * 0 0 0
2895 * 0 1 0
2896 * 1 0 0
2897 * 1 1 1
2898 *
2899 * Simple AND operation. Table listed for completeness.
2900 *
2901 */
2902 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT4K));
2903# ifdef PGM_SYNC_N_PAGES
2904 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2905 unsigned iPTDst = iPTBase;
2906 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2907 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2908 iPTDst = 0;
2909 else
2910 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2911# else /* !PGM_SYNC_N_PAGES */
2912 unsigned iPTDst = 0;
2913 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2914# endif /* !PGM_SYNC_N_PAGES */
2915# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2916 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2917 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2918# else
2919 const unsigned offPTSrc = 0;
2920# endif
2921 for (; iPTDst < iPTDstEnd; iPTDst++)
2922 {
2923 const unsigned iPTSrc = iPTDst + offPTSrc;
2924 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2925
2926 if (PteSrc.n.u1Present)
2927 {
2928# ifndef IN_RING0
2929 /*
2930 * Assuming kernel code will be marked as supervisor - and not as user level
2931 * and executed using a conforming code selector - And marked as readonly.
2932 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2933 */
2934 PPGMPAGE pPage;
2935 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2936 || !CSAMDoesPageNeedScanning(pVM, (iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT))
2937 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2938 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2939 )
2940# endif
2941 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2942 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2943 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2944 PteSrc.n.u1Present,
2945 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2946 PteSrc.n.u1User & PdeSrc.n.u1User,
2947 (uint64_t)PteSrc.u,
2948 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2949 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2950 }
2951 /* else: the page table was cleared by the pool */
2952 } /* for PTEs */
2953 }
2954 }
2955 else
2956 {
2957 /*
2958 * Big page - 2/4MB.
2959 *
2960 * We'll walk the ram range list in parallel and optimize lookups.
2961 * We will only sync on shadow page table at a time.
2962 */
2963 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT4M));
2964
2965 /**
2966 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2967 */
2968
2969 /*
2970 * Start by syncing the page directory entry.
2971 */
2972 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2973 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2974
2975 /*
2976 * If the page is not flagged as dirty and is writable, then make it read-only
2977 * at PD level, so we can set the dirty bit when the page is modified.
2978 *
2979 * ASSUMES that page access handlers are implemented on page table entry level.
2980 * Thus we will first catch the dirty access and set PDE.D and restart. If
2981 * there is an access handler, we'll trap again and let it work on the problem.
2982 */
2983 /** @todo move the above stuff to a section in the PGM documentation. */
2984 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2985 if ( !PdeSrc.b.u1Dirty
2986 && PdeSrc.b.u1Write)
2987 {
2988 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,DirtyPageBig));
2989 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2990 PdeDst.b.u1Write = 0;
2991 }
2992 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2993# if defined(IN_RC)
2994 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2995# endif
2996
2997 /*
2998 * Fill the shadow page table.
2999 */
3000 /* Get address and flags from the source PDE. */
3001 SHWPTE PteDstBase;
3002 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
3003
3004 /* Loop thru the entries in the shadow PT. */
3005 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
3006 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
3007 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
3008 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
3009 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
3010 unsigned iPTDst = 0;
3011 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3012 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
3013 {
3014 /* Advance ram range list. */
3015 while (pRam && GCPhys > pRam->GCPhysLast)
3016 pRam = pRam->CTX_SUFF(pNext);
3017 if (pRam && GCPhys >= pRam->GCPhys)
3018 {
3019 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
3020 do
3021 {
3022 /* Make shadow PTE. */
3023 PPGMPAGE pPage = &pRam->aPages[iHCPage];
3024 SHWPTE PteDst;
3025
3026# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3027 /* Try to make the page writable if necessary. */
3028 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
3029 && ( PGM_PAGE_IS_ZERO(pPage)
3030 || ( PteDstBase.n.u1Write
3031 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
3032# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
3033 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
3034# endif
3035# ifdef VBOX_WITH_PAGE_SHARING
3036 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
3037# endif
3038 && !PGM_PAGE_IS_BALLOONED(pPage))
3039 )
3040 )
3041 {
3042 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
3043 AssertRCReturn(rc, rc);
3044 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
3045 break;
3046 }
3047# endif
3048
3049 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
3050 {
3051 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
3052 {
3053 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
3054 PteDst.n.u1Write = 0;
3055 }
3056 else
3057 PteDst.u = 0;
3058 }
3059 else if (PGM_PAGE_IS_BALLOONED(pPage))
3060 {
3061 /* Skip ballooned pages. */
3062 PteDst.u = 0;
3063 }
3064# ifndef IN_RING0
3065 /*
3066 * Assuming kernel code will be marked as supervisor and not as user level and executed
3067 * using a conforming code selector. Don't check for readonly, as that implies the whole
3068 * 4MB can be code or readonly data. Linux enables write access for its large pages.
3069 */
3070 else if ( !PdeSrc.n.u1User
3071 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
3072 PteDst.u = 0;
3073# endif
3074 else
3075 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
3076
3077 /* Only map writable pages writable. */
3078 if ( PteDst.n.u1Write
3079 && PteDst.n.u1Present
3080 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
3081 {
3082 /* Still applies to shared pages. */
3083 Assert(!PGM_PAGE_IS_ZERO(pPage));
3084 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
3085 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
3086 }
3087
3088 if (PteDst.n.u1Present)
3089 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
3090
3091 /* commit it */
3092 pPTDst->a[iPTDst] = PteDst;
3093 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
3094 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
3095 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
3096
3097 /* advance */
3098 GCPhys += PAGE_SIZE;
3099 iHCPage++;
3100 iPTDst++;
3101 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3102 && GCPhys <= pRam->GCPhysLast);
3103 }
3104 else if (pRam)
3105 {
3106 Log(("Invalid pages at %RGp\n", GCPhys));
3107 do
3108 {
3109 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
3110 GCPhys += PAGE_SIZE;
3111 iPTDst++;
3112 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3113 && GCPhys < pRam->GCPhys);
3114 }
3115 else
3116 {
3117 Log(("Invalid pages at %RGp (2)\n", GCPhys));
3118 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
3119 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
3120 }
3121 } /* while more PTEs */
3122 } /* 4KB / 4MB */
3123 }
3124 else
3125 AssertRelease(!PdeDst.n.u1Present);
3126
3127 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3128 if (RT_FAILURE(rc))
3129 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPTFailed));
3130 return rc;
3131
3132#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
3133 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3134 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
3135 && !defined(IN_RC)
3136
3137 /*
3138 * Validate input a little bit.
3139 */
3140 int rc = VINF_SUCCESS;
3141# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3142 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3143 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3144
3145 /* Fetch the pgm pool shadow descriptor. */
3146 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
3147 Assert(pShwPde);
3148
3149# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3150 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3151 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
3152 PX86PDPAE pPDDst;
3153 PSHWPDE pPdeDst;
3154
3155 /* Fetch the pgm pool shadow descriptor. */
3156 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
3157 AssertRCSuccessReturn(rc, rc);
3158 Assert(pShwPde);
3159
3160 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
3161 pPdeDst = &pPDDst->a[iPDDst];
3162
3163# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3164 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
3165 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3166 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3167 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3168 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3169 AssertRCSuccessReturn(rc, rc);
3170 Assert(pPDDst);
3171 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3172
3173 /* Fetch the pgm pool shadow descriptor. */
3174 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3175 Assert(pShwPde);
3176
3177# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3178 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3179 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3180 PEPTPD pPDDst;
3181 PEPTPDPT pPdptDst;
3182
3183 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3184 if (rc != VINF_SUCCESS)
3185 {
3186 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3187 AssertRC(rc);
3188 return rc;
3189 }
3190 Assert(pPDDst);
3191 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3192
3193 /* Fetch the pgm pool shadow descriptor. */
3194 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3195 Assert(pShwPde);
3196# endif
3197 SHWPDE PdeDst = *pPdeDst;
3198
3199 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
3200 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3201
3202# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
3203 if (BTH_IS_NP_ACTIVE(pVM))
3204 {
3205 PPGMPAGE pPage;
3206
3207 /* Check if we allocated a big page before for this 2 MB range. */
3208 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
3209 if (RT_SUCCESS(rc))
3210 {
3211 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3212
3213 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3214 {
3215 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3216 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3217 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3218 }
3219 else if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
3220 {
3221 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3222 rc = pgmPhysIsValidLargePage(pVM, GCPtrPage, pPage);
3223 if (RT_SUCCESS(rc))
3224 {
3225 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3226 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3227 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3228 }
3229 }
3230 else if (PGMIsUsingLargePages(pVM))
3231 {
3232 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3233 if (RT_SUCCESS(rc))
3234 {
3235 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3236 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3237 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3238 }
3239 else
3240 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3241 }
3242
3243 if (HCPhys != NIL_RTHCPHYS)
3244 {
3245 PdeDst.u &= X86_PDE_AVL_MASK;
3246 PdeDst.u |= HCPhys;
3247 PdeDst.n.u1Present = 1;
3248 PdeDst.n.u1Write = 1;
3249 PdeDst.b.u1Size = 1;
3250# if PGM_SHW_TYPE == PGM_TYPE_EPT
3251 PdeDst.n.u1Execute = 1;
3252 PdeDst.b.u1IgnorePAT = 1;
3253 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3254# else
3255 PdeDst.n.u1User = 1;
3256# endif
3257 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3258
3259 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3260 /* Add a reference to the first page only. */
3261 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3262
3263 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3264 return VINF_SUCCESS;
3265 }
3266 }
3267 }
3268# endif /* HC_ARCH_BITS == 64 */
3269
3270 GSTPDE PdeSrc;
3271 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3272 PdeSrc.n.u1Present = 1;
3273 PdeSrc.n.u1Write = 1;
3274 PdeSrc.n.u1Accessed = 1;
3275 PdeSrc.n.u1User = 1;
3276
3277 /*
3278 * Allocate & map the page table.
3279 */
3280 PSHWPT pPTDst;
3281 PPGMPOOLPAGE pShwPage;
3282 RTGCPHYS GCPhys;
3283
3284 /* Virtual address = physical address */
3285 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3286 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3287
3288 if ( rc == VINF_SUCCESS
3289 || rc == VINF_PGM_CACHED_PAGE)
3290 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3291 else
3292 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3293
3294 PdeDst.u &= X86_PDE_AVL_MASK;
3295 PdeDst.u |= pShwPage->Core.Key;
3296 PdeDst.n.u1Present = 1;
3297 PdeDst.n.u1Write = 1;
3298# if PGM_SHW_TYPE == PGM_TYPE_EPT
3299 PdeDst.n.u1Execute = 1;
3300# else
3301 PdeDst.n.u1User = 1;
3302 PdeDst.n.u1Accessed = 1;
3303# endif
3304 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3305
3306 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3307 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3308 return rc;
3309
3310#else
3311 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3312 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncPT), a);
3313 return VERR_INTERNAL_ERROR;
3314#endif
3315}
3316
3317
3318
3319/**
3320 * Prefetch a page/set of pages.
3321 *
3322 * Typically used to sync commonly used pages before entering raw mode
3323 * after a CR3 reload.
3324 *
3325 * @returns VBox status code.
3326 * @param pVCpu The VMCPU handle.
3327 * @param GCPtrPage Page to invalidate.
3328 */
3329PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3330{
3331#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3332 || PGM_GST_TYPE == PGM_TYPE_REAL \
3333 || PGM_GST_TYPE == PGM_TYPE_PROT \
3334 || PGM_GST_TYPE == PGM_TYPE_PAE \
3335 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3336 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3337 && PGM_SHW_TYPE != PGM_TYPE_EPT
3338
3339 /*
3340 * Check that all Guest levels thru the PDE are present, getting the
3341 * PD and PDE in the processes.
3342 */
3343 int rc = VINF_SUCCESS;
3344# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3345# if PGM_GST_TYPE == PGM_TYPE_32BIT
3346 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3347 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3348# elif PGM_GST_TYPE == PGM_TYPE_PAE
3349 unsigned iPDSrc;
3350 X86PDPE PdpeSrc;
3351 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3352 if (!pPDSrc)
3353 return VINF_SUCCESS; /* not present */
3354# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3355 unsigned iPDSrc;
3356 PX86PML4E pPml4eSrc;
3357 X86PDPE PdpeSrc;
3358 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3359 if (!pPDSrc)
3360 return VINF_SUCCESS; /* not present */
3361# endif
3362 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3363# else
3364 PGSTPD pPDSrc = NULL;
3365 const unsigned iPDSrc = 0;
3366 GSTPDE PdeSrc;
3367
3368 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3369 PdeSrc.n.u1Present = 1;
3370 PdeSrc.n.u1Write = 1;
3371 PdeSrc.n.u1Accessed = 1;
3372 PdeSrc.n.u1User = 1;
3373# endif
3374
3375 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3376 {
3377 PVM pVM = pVCpu->CTX_SUFF(pVM);
3378 pgmLock(pVM);
3379
3380# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3381 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3382# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3383 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3384 PX86PDPAE pPDDst;
3385 X86PDEPAE PdeDst;
3386# if PGM_GST_TYPE != PGM_TYPE_PAE
3387 X86PDPE PdpeSrc;
3388
3389 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3390 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3391# endif
3392 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3393 if (rc != VINF_SUCCESS)
3394 {
3395 pgmUnlock(pVM);
3396 AssertRC(rc);
3397 return rc;
3398 }
3399 Assert(pPDDst);
3400 PdeDst = pPDDst->a[iPDDst];
3401
3402# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3403 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3404 PX86PDPAE pPDDst;
3405 X86PDEPAE PdeDst;
3406
3407# if PGM_GST_TYPE == PGM_TYPE_PROT
3408 /* AMD-V nested paging */
3409 X86PML4E Pml4eSrc;
3410 X86PDPE PdpeSrc;
3411 PX86PML4E pPml4eSrc = &Pml4eSrc;
3412
3413 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3414 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3415 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3416# endif
3417
3418 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3419 if (rc != VINF_SUCCESS)
3420 {
3421 pgmUnlock(pVM);
3422 AssertRC(rc);
3423 return rc;
3424 }
3425 Assert(pPDDst);
3426 PdeDst = pPDDst->a[iPDDst];
3427# endif
3428 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3429 {
3430 if (!PdeDst.n.u1Present)
3431 {
3432 /** @todo r=bird: This guy will set the A bit on the PDE,
3433 * probably harmless. */
3434 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3435 }
3436 else
3437 {
3438 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3439 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3440 * makes no sense to prefetch more than one page.
3441 */
3442 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3443 if (RT_SUCCESS(rc))
3444 rc = VINF_SUCCESS;
3445 }
3446 }
3447 pgmUnlock(pVM);
3448 }
3449 return rc;
3450
3451#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3452 return VINF_SUCCESS; /* ignore */
3453#else
3454 AssertCompile(0);
3455#endif
3456}
3457
3458
3459
3460
3461/**
3462 * Syncs a page during a PGMVerifyAccess() call.
3463 *
3464 * @returns VBox status code (informational included).
3465 * @param pVCpu The VMCPU handle.
3466 * @param GCPtrPage The address of the page to sync.
3467 * @param fPage The effective guest page flags.
3468 * @param uErr The trap error code.
3469 * @remarks This will normally never be called on invalid guest page
3470 * translation entries.
3471 */
3472PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3473{
3474 PVM pVM = pVCpu->CTX_SUFF(pVM);
3475
3476 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3477
3478 Assert(!pVM->pgm.s.fNestedPaging);
3479#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3480 || PGM_GST_TYPE == PGM_TYPE_REAL \
3481 || PGM_GST_TYPE == PGM_TYPE_PROT \
3482 || PGM_GST_TYPE == PGM_TYPE_PAE \
3483 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3484 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3485 && PGM_SHW_TYPE != PGM_TYPE_EPT
3486
3487# ifndef IN_RING0
3488 if (!(fPage & X86_PTE_US))
3489 {
3490 /*
3491 * Mark this page as safe.
3492 */
3493 /** @todo not correct for pages that contain both code and data!! */
3494 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3495 CSAMMarkPage(pVM, GCPtrPage, true);
3496 }
3497# endif
3498
3499 /*
3500 * Get guest PD and index.
3501 */
3502 /** @todo Performance: We've done all this a jiffy ago in the
3503 * PGMGstGetPage call. */
3504# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3505# if PGM_GST_TYPE == PGM_TYPE_32BIT
3506 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3507 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3508
3509# elif PGM_GST_TYPE == PGM_TYPE_PAE
3510 unsigned iPDSrc = 0;
3511 X86PDPE PdpeSrc;
3512 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3513 if (RT_UNLIKELY(!pPDSrc))
3514 {
3515 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3516 return VINF_EM_RAW_GUEST_TRAP;
3517 }
3518
3519# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3520 unsigned iPDSrc = 0; /* shut up gcc */
3521 PX86PML4E pPml4eSrc = NULL; /* ditto */
3522 X86PDPE PdpeSrc;
3523 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3524 if (RT_UNLIKELY(!pPDSrc))
3525 {
3526 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3527 return VINF_EM_RAW_GUEST_TRAP;
3528 }
3529# endif
3530
3531# else /* !PGM_WITH_PAGING */
3532 PGSTPD pPDSrc = NULL;
3533 const unsigned iPDSrc = 0;
3534# endif /* !PGM_WITH_PAGING */
3535 int rc = VINF_SUCCESS;
3536
3537 pgmLock(pVM);
3538
3539 /*
3540 * First check if the shadow pd is present.
3541 */
3542# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3543 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3544
3545# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3546 PX86PDEPAE pPdeDst;
3547 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3548 PX86PDPAE pPDDst;
3549# if PGM_GST_TYPE != PGM_TYPE_PAE
3550 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3551 X86PDPE PdpeSrc;
3552 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3553# endif
3554 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3555 if (rc != VINF_SUCCESS)
3556 {
3557 pgmUnlock(pVM);
3558 AssertRC(rc);
3559 return rc;
3560 }
3561 Assert(pPDDst);
3562 pPdeDst = &pPDDst->a[iPDDst];
3563
3564# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3565 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3566 PX86PDPAE pPDDst;
3567 PX86PDEPAE pPdeDst;
3568
3569# if PGM_GST_TYPE == PGM_TYPE_PROT
3570 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3571 X86PML4E Pml4eSrc;
3572 X86PDPE PdpeSrc;
3573 PX86PML4E pPml4eSrc = &Pml4eSrc;
3574 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3575 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3576# endif
3577
3578 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3579 if (rc != VINF_SUCCESS)
3580 {
3581 pgmUnlock(pVM);
3582 AssertRC(rc);
3583 return rc;
3584 }
3585 Assert(pPDDst);
3586 pPdeDst = &pPDDst->a[iPDDst];
3587# endif
3588
3589# if defined(IN_RC)
3590 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3591 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3592# endif
3593
3594 if (!pPdeDst->n.u1Present)
3595 {
3596 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3597 if (rc != VINF_SUCCESS)
3598 {
3599# if defined(IN_RC)
3600 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3601 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3602# endif
3603 pgmUnlock(pVM);
3604 AssertRC(rc);
3605 return rc;
3606 }
3607 }
3608
3609# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3610 /* Check for dirty bit fault */
3611 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3612 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3613 Log(("PGMVerifyAccess: success (dirty)\n"));
3614 else
3615# endif
3616 {
3617# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3618 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3619# else
3620 GSTPDE PdeSrc;
3621 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3622 PdeSrc.n.u1Present = 1;
3623 PdeSrc.n.u1Write = 1;
3624 PdeSrc.n.u1Accessed = 1;
3625 PdeSrc.n.u1User = 1;
3626# endif
3627
3628 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3629 if (uErr & X86_TRAP_PF_US)
3630 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncUser));
3631 else /* supervisor */
3632 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3633
3634 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3635 if (RT_SUCCESS(rc))
3636 {
3637 /* Page was successfully synced */
3638 Log2(("PGMVerifyAccess: success (sync)\n"));
3639 rc = VINF_SUCCESS;
3640 }
3641 else
3642 {
3643 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
3644 rc = VINF_EM_RAW_GUEST_TRAP;
3645 }
3646 }
3647# if defined(IN_RC)
3648 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3649 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3650# endif
3651 pgmUnlock(pVM);
3652 return rc;
3653
3654#else /* PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_NESTED */
3655
3656 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3657 return VERR_INTERNAL_ERROR;
3658#endif /* PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_NESTED */
3659}
3660
3661
3662/**
3663 * Syncs the paging hierarchy starting at CR3.
3664 *
3665 * @returns VBox status code, no specials.
3666 * @param pVCpu The VMCPU handle.
3667 * @param cr0 Guest context CR0 register
3668 * @param cr3 Guest context CR3 register
3669 * @param cr4 Guest context CR4 register
3670 * @param fGlobal Including global page directories or not
3671 */
3672PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3673{
3674 PVM pVM = pVCpu->CTX_SUFF(pVM);
3675
3676 LogFlow(("SyncCR3 fGlobal=%d\n", !!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
3677
3678#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3679
3680 pgmLock(pVM);
3681
3682# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3683 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3684 if (pPool->cDirtyPages)
3685 pgmPoolResetDirtyPages(pVM);
3686# endif
3687
3688 /*
3689 * Update page access handlers.
3690 * The virtual are always flushed, while the physical are only on demand.
3691 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3692 * have to look into that later because it will have a bad influence on the performance.
3693 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3694 * bird: Yes, but that won't work for aliases.
3695 */
3696 /** @todo this MUST go away. See #1557. */
3697 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncCR3Handlers), h);
3698 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3699 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->CTX_MID_Z(Stat,SyncCR3Handlers), h);
3700 pgmUnlock(pVM);
3701#endif /* !NESTED && !EPT */
3702
3703#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3704 /*
3705 * Nested / EPT - almost no work.
3706 */
3707 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3708 return VINF_SUCCESS;
3709
3710#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3711 /*
3712 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3713 * out the shadow parts when the guest modifies its tables.
3714 */
3715 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3716 return VINF_SUCCESS;
3717
3718#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3719
3720# ifndef PGM_WITHOUT_MAPPINGS
3721 /*
3722 * Check for and resolve conflicts with our guest mappings if they
3723 * are enabled and not fixed.
3724 */
3725 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
3726 {
3727 int rc = pgmMapResolveConflicts(pVM);
3728 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3729 if (rc == VINF_PGM_SYNC_CR3)
3730 {
3731 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3732 return VINF_PGM_SYNC_CR3;
3733 }
3734 }
3735# else
3736 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3737# endif
3738 return VINF_SUCCESS;
3739#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3740}
3741
3742
3743
3744
3745#ifdef VBOX_STRICT
3746#ifdef IN_RC
3747# undef AssertMsgFailed
3748# define AssertMsgFailed Log
3749#endif
3750#ifdef IN_RING3
3751# include <VBox/dbgf.h>
3752
3753/**
3754 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3755 *
3756 * @returns VBox status code (VINF_SUCCESS).
3757 * @param cr3 The root of the hierarchy.
3758 * @param crr The cr4, only PAE and PSE is currently used.
3759 * @param fLongMode Set if long mode, false if not long mode.
3760 * @param cMaxDepth Number of levels to dump.
3761 * @param pHlp Pointer to the output functions.
3762 */
3763RT_C_DECLS_BEGIN
3764VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3765RT_C_DECLS_END
3766
3767#endif
3768
3769/**
3770 * Checks that the shadow page table is in sync with the guest one.
3771 *
3772 * @returns The number of errors.
3773 * @param pVM The virtual machine.
3774 * @param pVCpu The VMCPU handle.
3775 * @param cr3 Guest context CR3 register
3776 * @param cr4 Guest context CR4 register
3777 * @param GCPtr Where to start. Defaults to 0.
3778 * @param cb How much to check. Defaults to everything.
3779 */
3780PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3781{
3782#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3783 return 0;
3784#else
3785 unsigned cErrors = 0;
3786 PVM pVM = pVCpu->CTX_SUFF(pVM);
3787 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3788
3789#if PGM_GST_TYPE == PGM_TYPE_PAE
3790 /** @todo currently broken; crashes below somewhere */
3791 AssertFailed();
3792#endif
3793
3794#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3795 || PGM_GST_TYPE == PGM_TYPE_PAE \
3796 || PGM_GST_TYPE == PGM_TYPE_AMD64
3797
3798 bool fBigPagesSupported = GST_IS_PSE_ACTIVE(pVCpu);
3799 PPGMCPU pPGM = &pVCpu->pgm.s;
3800 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3801 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3802# ifndef IN_RING0
3803 RTHCPHYS HCPhys; /* general usage. */
3804# endif
3805 int rc;
3806
3807 /*
3808 * Check that the Guest CR3 and all its mappings are correct.
3809 */
3810 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3811 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3812 false);
3813# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3814# if PGM_GST_TYPE == PGM_TYPE_32BIT
3815 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3816# else
3817 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3818# endif
3819 AssertRCReturn(rc, 1);
3820 HCPhys = NIL_RTHCPHYS;
3821 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3822 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3823# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3824 pgmGstGet32bitPDPtr(pVCpu);
3825 RTGCPHYS GCPhys;
3826 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3827 AssertRCReturn(rc, 1);
3828 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3829# endif
3830# endif /* !IN_RING0 */
3831
3832 /*
3833 * Get and check the Shadow CR3.
3834 */
3835# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3836 unsigned cPDEs = X86_PG_ENTRIES;
3837 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3838# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3839# if PGM_GST_TYPE == PGM_TYPE_32BIT
3840 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3841# else
3842 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3843# endif
3844 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3845# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3846 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3847 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3848# endif
3849 if (cb != ~(RTGCPTR)0)
3850 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3851
3852/** @todo call the other two PGMAssert*() functions. */
3853
3854# if PGM_GST_TYPE == PGM_TYPE_AMD64
3855 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3856
3857 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3858 {
3859 PPGMPOOLPAGE pShwPdpt = NULL;
3860 PX86PML4E pPml4eSrc;
3861 PX86PML4E pPml4eDst;
3862 RTGCPHYS GCPhysPdptSrc;
3863
3864 pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
3865 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3866
3867 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3868 if (!pPml4eDst->n.u1Present)
3869 {
3870 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3871 continue;
3872 }
3873
3874 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3875 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3876
3877 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3878 {
3879 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3880 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3881 cErrors++;
3882 continue;
3883 }
3884
3885 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3886 {
3887 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3888 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3889 cErrors++;
3890 continue;
3891 }
3892
3893 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3894 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3895 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3896 {
3897 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3898 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3899 cErrors++;
3900 continue;
3901 }
3902# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3903 {
3904# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3905
3906# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3907 /*
3908 * Check the PDPTEs too.
3909 */
3910 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3911
3912 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3913 {
3914 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3915 PPGMPOOLPAGE pShwPde = NULL;
3916 PX86PDPE pPdpeDst;
3917 RTGCPHYS GCPhysPdeSrc;
3918# if PGM_GST_TYPE == PGM_TYPE_PAE
3919 X86PDPE PdpeSrc;
3920 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
3921 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3922# else
3923 PX86PML4E pPml4eSrcIgn;
3924 X86PDPE PdpeSrc;
3925 PX86PDPT pPdptDst;
3926 PX86PDPAE pPDDst;
3927 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3928
3929 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3930 if (rc != VINF_SUCCESS)
3931 {
3932 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3933 GCPtr += 512 * _2M;
3934 continue; /* next PDPTE */
3935 }
3936 Assert(pPDDst);
3937# endif
3938 Assert(iPDSrc == 0);
3939
3940 pPdpeDst = &pPdptDst->a[iPdpt];
3941
3942 if (!pPdpeDst->n.u1Present)
3943 {
3944 GCPtr += 512 * _2M;
3945 continue; /* next PDPTE */
3946 }
3947
3948 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3949 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3950
3951 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3952 {
3953 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3954 GCPtr += 512 * _2M;
3955 cErrors++;
3956 continue;
3957 }
3958
3959 if (GCPhysPdeSrc != pShwPde->GCPhys)
3960 {
3961# if PGM_GST_TYPE == PGM_TYPE_AMD64
3962 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3963# else
3964 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3965# endif
3966 GCPtr += 512 * _2M;
3967 cErrors++;
3968 continue;
3969 }
3970
3971# if PGM_GST_TYPE == PGM_TYPE_AMD64
3972 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3973 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3974 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3975 {
3976 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3977 GCPtr += 512 * _2M;
3978 cErrors++;
3979 continue;
3980 }
3981# endif
3982
3983# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3984 {
3985# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3986# if PGM_GST_TYPE == PGM_TYPE_32BIT
3987 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3988# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3989 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3990# endif
3991# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3992 /*
3993 * Iterate the shadow page directory.
3994 */
3995 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3996 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3997
3998 for (;
3999 iPDDst < cPDEs;
4000 iPDDst++, GCPtr += cIncrement)
4001 {
4002# if PGM_SHW_TYPE == PGM_TYPE_PAE
4003 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
4004# else
4005 const SHWPDE PdeDst = pPDDst->a[iPDDst];
4006# endif
4007 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
4008 {
4009 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
4010 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
4011 {
4012 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
4013 cErrors++;
4014 continue;
4015 }
4016 }
4017 else if ( (PdeDst.u & X86_PDE_P)
4018 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
4019 )
4020 {
4021 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
4022 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
4023 if (!pPoolPage)
4024 {
4025 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
4026 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
4027 cErrors++;
4028 continue;
4029 }
4030 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
4031
4032 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
4033 {
4034 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
4035 GCPtr, (uint64_t)PdeDst.u));
4036 cErrors++;
4037 }
4038
4039 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
4040 {
4041 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
4042 GCPtr, (uint64_t)PdeDst.u));
4043 cErrors++;
4044 }
4045
4046 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
4047 if (!PdeSrc.n.u1Present)
4048 {
4049 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
4050 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
4051 cErrors++;
4052 continue;
4053 }
4054
4055 if ( !PdeSrc.b.u1Size
4056 || !fBigPagesSupported)
4057 {
4058 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
4059# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4060 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
4061# endif
4062 }
4063 else
4064 {
4065# if PGM_GST_TYPE == PGM_TYPE_32BIT
4066 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
4067 {
4068 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
4069 GCPtr, (uint64_t)PdeSrc.u));
4070 cErrors++;
4071 continue;
4072 }
4073# endif
4074 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
4075# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4076 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
4077# endif
4078 }
4079
4080 if ( pPoolPage->enmKind
4081 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
4082 {
4083 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
4084 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
4085 cErrors++;
4086 }
4087
4088 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4089 if (!pPhysPage)
4090 {
4091 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4092 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4093 cErrors++;
4094 continue;
4095 }
4096
4097 if (GCPhysGst != pPoolPage->GCPhys)
4098 {
4099 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
4100 GCPhysGst, pPoolPage->GCPhys, GCPtr));
4101 cErrors++;
4102 continue;
4103 }
4104
4105 if ( !PdeSrc.b.u1Size
4106 || !fBigPagesSupported)
4107 {
4108 /*
4109 * Page Table.
4110 */
4111 const GSTPT *pPTSrc;
4112 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
4113 if (RT_FAILURE(rc))
4114 {
4115 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4116 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4117 cErrors++;
4118 continue;
4119 }
4120 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
4121 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
4122 {
4123 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
4124 // (This problem will go away when/if we shadow multiple CR3s.)
4125 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4126 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4127 cErrors++;
4128 continue;
4129 }
4130 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4131 {
4132 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
4133 GCPtr, (uint64_t)PdeDst.u));
4134 cErrors++;
4135 continue;
4136 }
4137
4138 /* iterate the page table. */
4139# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4140 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
4141 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
4142# else
4143 const unsigned offPTSrc = 0;
4144# endif
4145 for (unsigned iPT = 0, off = 0;
4146 iPT < RT_ELEMENTS(pPTDst->a);
4147 iPT++, off += PAGE_SIZE)
4148 {
4149 const SHWPTE PteDst = pPTDst->a[iPT];
4150
4151 /* skip not-present entries. */
4152 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
4153 continue;
4154 Assert(PteDst.n.u1Present);
4155
4156 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
4157 if (!PteSrc.n.u1Present)
4158 {
4159# ifdef IN_RING3
4160 PGMAssertHandlerAndFlagsInSync(pVM);
4161 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
4162# endif
4163 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
4164 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
4165 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
4166 cErrors++;
4167 continue;
4168 }
4169
4170 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
4171# if 1 /** @todo sync accessed bit properly... */
4172 fIgnoreFlags |= X86_PTE_A;
4173# endif
4174
4175 /* match the physical addresses */
4176 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
4177 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
4178
4179# ifdef IN_RING3
4180 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4181 if (RT_FAILURE(rc))
4182 {
4183 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4184 {
4185 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4186 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4187 cErrors++;
4188 continue;
4189 }
4190 }
4191 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4192 {
4193 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4194 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4195 cErrors++;
4196 continue;
4197 }
4198# endif
4199
4200 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4201 if (!pPhysPage)
4202 {
4203# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4204 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4205 {
4206 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4207 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4208 cErrors++;
4209 continue;
4210 }
4211# endif
4212 if (PteDst.n.u1Write)
4213 {
4214 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4215 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4216 cErrors++;
4217 }
4218 fIgnoreFlags |= X86_PTE_RW;
4219 }
4220 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4221 {
4222 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4223 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4224 cErrors++;
4225 continue;
4226 }
4227
4228 /* flags */
4229 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4230 {
4231 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4232 {
4233 if (PteDst.n.u1Write)
4234 {
4235 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4236 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4237 cErrors++;
4238 continue;
4239 }
4240 fIgnoreFlags |= X86_PTE_RW;
4241 }
4242 else
4243 {
4244 if ( PteDst.n.u1Present
4245# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4246 && !PGM_PAGE_IS_MMIO(pPhysPage)
4247# endif
4248 )
4249 {
4250 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4251 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4252 cErrors++;
4253 continue;
4254 }
4255 fIgnoreFlags |= X86_PTE_P;
4256 }
4257 }
4258 else
4259 {
4260 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4261 {
4262 if (PteDst.n.u1Write)
4263 {
4264 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4265 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4266 cErrors++;
4267 continue;
4268 }
4269 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4270 {
4271 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4272 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4273 cErrors++;
4274 continue;
4275 }
4276 if (PteDst.n.u1Dirty)
4277 {
4278 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4279 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4280 cErrors++;
4281 }
4282# if 0 /** @todo sync access bit properly... */
4283 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4284 {
4285 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4286 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4287 cErrors++;
4288 }
4289 fIgnoreFlags |= X86_PTE_RW;
4290# else
4291 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4292# endif
4293 }
4294 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4295 {
4296 /* access bit emulation (not implemented). */
4297 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4298 {
4299 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4300 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4301 cErrors++;
4302 continue;
4303 }
4304 if (!PteDst.n.u1Accessed)
4305 {
4306 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4307 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4308 cErrors++;
4309 }
4310 fIgnoreFlags |= X86_PTE_P;
4311 }
4312# ifdef DEBUG_sandervl
4313 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4314# endif
4315 }
4316
4317 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4318 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4319 )
4320 {
4321 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4322 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4323 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4324 cErrors++;
4325 continue;
4326 }
4327 } /* foreach PTE */
4328 }
4329 else
4330 {
4331 /*
4332 * Big Page.
4333 */
4334 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4335 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4336 {
4337 if (PdeDst.n.u1Write)
4338 {
4339 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4340 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4341 cErrors++;
4342 continue;
4343 }
4344 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4345 {
4346 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4347 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4348 cErrors++;
4349 continue;
4350 }
4351# if 0 /** @todo sync access bit properly... */
4352 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4353 {
4354 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4355 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4356 cErrors++;
4357 }
4358 fIgnoreFlags |= X86_PTE_RW;
4359# else
4360 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4361# endif
4362 }
4363 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4364 {
4365 /* access bit emulation (not implemented). */
4366 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4367 {
4368 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4369 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4370 cErrors++;
4371 continue;
4372 }
4373 if (!PdeDst.n.u1Accessed)
4374 {
4375 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4376 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4377 cErrors++;
4378 }
4379 fIgnoreFlags |= X86_PTE_P;
4380 }
4381
4382 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4383 {
4384 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4385 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4386 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4387 cErrors++;
4388 }
4389
4390 /* iterate the page table. */
4391 for (unsigned iPT = 0, off = 0;
4392 iPT < RT_ELEMENTS(pPTDst->a);
4393 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4394 {
4395 const SHWPTE PteDst = pPTDst->a[iPT];
4396
4397 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4398 {
4399 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4400 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4401 cErrors++;
4402 }
4403
4404 /* skip not-present entries. */
4405 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4406 continue;
4407
4408 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4409
4410 /* match the physical addresses */
4411 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4412
4413# ifdef IN_RING3
4414 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4415 if (RT_FAILURE(rc))
4416 {
4417 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4418 {
4419 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4420 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4421 cErrors++;
4422 }
4423 }
4424 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4425 {
4426 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4427 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4428 cErrors++;
4429 continue;
4430 }
4431# endif
4432 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4433 if (!pPhysPage)
4434 {
4435# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4436 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4437 {
4438 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4439 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4440 cErrors++;
4441 continue;
4442 }
4443# endif
4444 if (PteDst.n.u1Write)
4445 {
4446 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4447 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4448 cErrors++;
4449 }
4450 fIgnoreFlags |= X86_PTE_RW;
4451 }
4452 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4453 {
4454 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4455 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4456 cErrors++;
4457 continue;
4458 }
4459
4460 /* flags */
4461 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4462 {
4463 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4464 {
4465 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4466 {
4467 if (PteDst.n.u1Write)
4468 {
4469 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4470 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4471 cErrors++;
4472 continue;
4473 }
4474 fIgnoreFlags |= X86_PTE_RW;
4475 }
4476 }
4477 else
4478 {
4479 if ( PteDst.n.u1Present
4480# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4481 && !PGM_PAGE_IS_MMIO(pPhysPage)
4482# endif
4483 )
4484 {
4485 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4486 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4487 cErrors++;
4488 continue;
4489 }
4490 fIgnoreFlags |= X86_PTE_P;
4491 }
4492 }
4493
4494 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4495 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4496 )
4497 {
4498 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4499 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4500 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4501 cErrors++;
4502 continue;
4503 }
4504 } /* for each PTE */
4505 }
4506 }
4507 /* not present */
4508
4509 } /* for each PDE */
4510
4511 } /* for each PDPTE */
4512
4513 } /* for each PML4E */
4514
4515# ifdef DEBUG
4516 if (cErrors)
4517 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4518# endif
4519
4520#endif /* GST == 32BIT, PAE or AMD64 */
4521 return cErrors;
4522
4523#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4524}
4525#endif /* VBOX_STRICT */
4526
4527
4528/**
4529 * Sets up the CR3 for shadow paging
4530 *
4531 * @returns Strict VBox status code.
4532 * @retval VINF_SUCCESS.
4533 *
4534 * @param pVCpu The VMCPU handle.
4535 * @param GCPhysCR3 The physical address in the CR3 register.
4536 */
4537PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4538{
4539 PVM pVM = pVCpu->CTX_SUFF(pVM);
4540
4541 /* Update guest paging info. */
4542#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4543 || PGM_GST_TYPE == PGM_TYPE_PAE \
4544 || PGM_GST_TYPE == PGM_TYPE_AMD64
4545
4546 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4547
4548 /*
4549 * Map the page CR3 points at.
4550 */
4551 RTHCPTR HCPtrGuestCR3;
4552 RTHCPHYS HCPhysGuestCR3;
4553 pgmLock(pVM);
4554 PPGMPAGE pPageCR3 = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4555 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4556 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4557 /** @todo this needs some reworking wrt. locking. */
4558# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4559 HCPtrGuestCR3 = NIL_RTHCPTR;
4560 int rc = VINF_SUCCESS;
4561# else
4562 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4563# endif
4564 pgmUnlock(pVM);
4565 if (RT_SUCCESS(rc))
4566 {
4567 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4568 if (RT_SUCCESS(rc))
4569 {
4570# ifdef IN_RC
4571 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4572# endif
4573# if PGM_GST_TYPE == PGM_TYPE_32BIT
4574 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4575# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4576 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4577# endif
4578 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4579
4580# elif PGM_GST_TYPE == PGM_TYPE_PAE
4581 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4582 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4583# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4584 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4585# endif
4586 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4587 LogFlow(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4588
4589 /*
4590 * Map the 4 PDs too.
4591 */
4592 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(pVCpu);
4593 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4594 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4595 {
4596 if (pGuestPDPT->a[i].n.u1Present)
4597 {
4598 RTHCPTR HCPtr;
4599 RTHCPHYS HCPhys;
4600 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4601 pgmLock(pVM);
4602 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4603 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4604 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4605# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4606 HCPtr = NIL_RTHCPTR;
4607 int rc2 = VINF_SUCCESS;
4608# else
4609 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4610# endif
4611 pgmUnlock(pVM);
4612 if (RT_SUCCESS(rc2))
4613 {
4614 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4615 AssertRCReturn(rc, rc);
4616
4617 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4618# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4619 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4620# endif
4621 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4622 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4623# ifdef IN_RC
4624 PGM_INVL_PG(pVCpu, GCPtr);
4625# endif
4626 continue;
4627 }
4628 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4629 }
4630
4631 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4632# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4633 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4634# endif
4635 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4636 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4637# ifdef IN_RC
4638 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4639# endif
4640 }
4641
4642# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4643 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4644# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4645 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4646# endif
4647# endif
4648 }
4649 else
4650 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4651 }
4652 else
4653 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4654
4655#else /* prot/real stub */
4656 int rc = VINF_SUCCESS;
4657#endif
4658
4659 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4660# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4661 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4662 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4663 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4664 && PGM_GST_TYPE != PGM_TYPE_PROT))
4665
4666 Assert(!pVM->pgm.s.fNestedPaging);
4667
4668 /*
4669 * Update the shadow root page as well since that's not fixed.
4670 */
4671 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4672 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4673 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4674 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4675 PPGMPOOLPAGE pNewShwPageCR3;
4676
4677 pgmLock(pVM);
4678
4679# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4680 if (pPool->cDirtyPages)
4681 pgmPoolResetDirtyPages(pVM);
4682# endif
4683
4684 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4685 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4686 AssertFatalRC(rc);
4687 rc = VINF_SUCCESS;
4688
4689# ifdef IN_RC
4690 /*
4691 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4692 * state will be inconsistent! Flush important things now while
4693 * we still can and then make sure there are no ring-3 calls.
4694 */
4695 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4696 VMMRZCallRing3Disable(pVCpu);
4697# endif
4698
4699 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4700 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4701 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4702# ifdef IN_RING0
4703 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4704 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4705# elif defined(IN_RC)
4706 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4707 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4708# else
4709 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4710 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4711# endif
4712
4713# ifndef PGM_WITHOUT_MAPPINGS
4714 /*
4715 * Apply all hypervisor mappings to the new CR3.
4716 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4717 * make sure we check for conflicts in the new CR3 root.
4718 */
4719# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4720 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4721# endif
4722 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4723 AssertRCReturn(rc, rc);
4724# endif
4725
4726 /* Set the current hypervisor CR3. */
4727 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4728 SELMShadowCR3Changed(pVM, pVCpu);
4729
4730# ifdef IN_RC
4731 /* NOTE: The state is consistent again. */
4732 VMMRZCallRing3Enable(pVCpu);
4733# endif
4734
4735 /* Clean up the old CR3 root. */
4736 if ( pOldShwPageCR3
4737 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4738 {
4739 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4740# ifndef PGM_WITHOUT_MAPPINGS
4741 /* Remove the hypervisor mappings from the shadow page table. */
4742 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4743# endif
4744 /* Mark the page as unlocked; allow flushing again. */
4745 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4746
4747 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4748 }
4749 pgmUnlock(pVM);
4750# endif
4751
4752 return rc;
4753}
4754
4755/**
4756 * Unmaps the shadow CR3.
4757 *
4758 * @returns VBox status, no specials.
4759 * @param pVCpu The VMCPU handle.
4760 */
4761PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4762{
4763 LogFlow(("UnmapCR3\n"));
4764
4765 int rc = VINF_SUCCESS;
4766 PVM pVM = pVCpu->CTX_SUFF(pVM);
4767
4768 /*
4769 * Update guest paging info.
4770 */
4771#if PGM_GST_TYPE == PGM_TYPE_32BIT
4772 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4773# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4774 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4775# endif
4776 pVCpu->pgm.s.pGst32BitPdRC = 0;
4777
4778#elif PGM_GST_TYPE == PGM_TYPE_PAE
4779 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4780# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4781 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4782# endif
4783 pVCpu->pgm.s.pGstPaePdptRC = 0;
4784 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4785 {
4786 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4787# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4788 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4789# endif
4790 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4791 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4792 }
4793
4794#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4795 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4796# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4797 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4798# endif
4799
4800#else /* prot/real mode stub */
4801 /* nothing to do */
4802#endif
4803
4804#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4805 /*
4806 * Update shadow paging info.
4807 */
4808# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4809 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4810 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4811
4812# if PGM_GST_TYPE != PGM_TYPE_REAL
4813 Assert(!pVM->pgm.s.fNestedPaging);
4814# endif
4815
4816 pgmLock(pVM);
4817
4818# ifndef PGM_WITHOUT_MAPPINGS
4819 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4820 /* Remove the hypervisor mappings from the shadow page table. */
4821 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4822# endif
4823
4824 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4825 {
4826 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4827
4828 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4829
4830# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4831 if (pPool->cDirtyPages)
4832 pgmPoolResetDirtyPages(pVM);
4833# endif
4834
4835 /* Mark the page as unlocked; allow flushing again. */
4836 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4837
4838 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4839 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4840 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4841 pVCpu->pgm.s.pShwPageCR3RC = 0;
4842 pVCpu->pgm.s.iShwUser = 0;
4843 pVCpu->pgm.s.iShwUserTable = 0;
4844 }
4845 pgmUnlock(pVM);
4846# endif
4847#endif /* !IN_RC*/
4848
4849 return rc;
4850}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette