VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 94200

Last change on this file since 94200 was 94012, checked in by vboxsync, 3 years ago

VMM: Nested VMX: bugref:10092 Remove unused functions CheckPageFaultReturnNP, CheckPageFaultReturnRSVD and CheckPageFaultReturnProt.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 197.9 KB
Line 
1/* $Id: PGMAllBth.h 94012 2022-03-01 08:52:20Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
6 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
7 * bird: WTF does this mean these days? Looking at PGMAll.cpp it's
8 *
9 * @remarks This file is one big \#ifdef-orgy!
10 *
11 */
12
13/*
14 * Copyright (C) 2006-2022 Oracle Corporation
15 *
16 * This file is part of VirtualBox Open Source Edition (OSE), as
17 * available from http://www.virtualbox.org. This file is free software;
18 * you can redistribute it and/or modify it under the terms of the GNU
19 * General Public License (GPL) as published by the Free Software
20 * Foundation, in version 2 as it comes in the "COPYING" file of the
21 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
22 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
23 */
24
25#ifdef _MSC_VER
26/** @todo we're generating unnecessary code in nested/ept shadow mode and for
27 * real/prot-guest+RC mode. */
28# pragma warning(disable: 4505)
29#endif
30
31
32/*********************************************************************************************************************************
33* Internal Functions *
34*********************************************************************************************************************************/
35RT_C_DECLS_BEGIN
36PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
37#ifndef IN_RING3
38PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
39PGM_BTH_DECL(int, NestedTrap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysNested,
40 bool fIsLinearAddrValid, RTGCPTR GCPtrNested, PPGMPTWALK pWalk, bool *pfLockTaken);
41#endif
42PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
43static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
44static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage);
45static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
46#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
47static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
48#else
49static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
50#endif
51PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
52PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
53PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
54#ifdef VBOX_STRICT
55PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
56#endif
57PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
58PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu);
59
60#ifdef IN_RING3
61PGM_BTH_DECL(int, Relocate)(PVMCPUCC pVCpu, RTGCPTR offDelta);
62#endif
63RT_C_DECLS_END
64
65
66
67
68/*
69 * Filter out some illegal combinations of guest and shadow paging, so we can
70 * remove redundant checks inside functions.
71 */
72#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE \
73 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
74# error "Invalid combination; PAE guest implies PAE shadow"
75#endif
76
77#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
78 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 \
79 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
80# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
81#endif
82
83#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
84 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE \
85 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
86# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
87#endif
88
89#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE) \
90 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
91# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
92#endif
93
94
95/**
96 * Enters the shadow+guest mode.
97 *
98 * @returns VBox status code.
99 * @param pVCpu The cross context virtual CPU structure.
100 * @param GCPhysCR3 The physical address from the CR3 register.
101 */
102PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
103{
104 /* Here we deal with allocation of the root shadow page table for real and protected mode during mode switches;
105 * Other modes rely on MapCR3/UnmapCR3 to setup the shadow root page tables.
106 */
107#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
108 || PGM_SHW_TYPE == PGM_TYPE_PAE \
109 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
110 && ( PGM_GST_TYPE == PGM_TYPE_REAL \
111 || PGM_GST_TYPE == PGM_TYPE_PROT))
112
113 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
114
115 Assert(HMIsNestedPagingActive(pVM));
116 Assert(!pVM->pgm.s.fNestedPaging);
117
118 PGM_LOCK_VOID(pVM);
119 /* Note: we only really need shadow paging in real and protected mode for VT-x and AMD-V (excluding nested paging/EPT modes),
120 * but any calls to GC need a proper shadow page setup as well.
121 */
122 /* Free the previous root mapping if still active. */
123 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
124 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
125 if (pOldShwPageCR3)
126 {
127 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
128
129 /* Mark the page as unlocked; allow flushing again. */
130 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
131
132 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
133 pVCpu->pgm.s.pShwPageCR3R3 = NIL_RTR3PTR;
134 pVCpu->pgm.s.pShwPageCR3R0 = NIL_RTR0PTR;
135 }
136
137 /* construct a fake address. */
138 GCPhysCR3 = RT_BIT_64(63);
139 PPGMPOOLPAGE pNewShwPageCR3;
140 int rc = pgmPoolAlloc(pVM, GCPhysCR3, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
141 NIL_PGMPOOL_IDX, UINT32_MAX, false /*fLockPage*/,
142 &pNewShwPageCR3);
143 AssertRCReturn(rc, rc);
144
145 pVCpu->pgm.s.pShwPageCR3R3 = pgmPoolConvertPageToR3(pPool, pNewShwPageCR3);
146 pVCpu->pgm.s.pShwPageCR3R0 = pgmPoolConvertPageToR0(pPool, pNewShwPageCR3);
147
148 /* Mark the page as locked; disallow flushing. */
149 pgmPoolLockPage(pPool, pNewShwPageCR3);
150
151 /* Set the current hypervisor CR3. */
152 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
153
154 PGM_UNLOCK(pVM);
155 return rc;
156#else
157 NOREF(pVCpu); NOREF(GCPhysCR3);
158 return VINF_SUCCESS;
159#endif
160}
161
162
163#ifndef IN_RING3
164
165# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
166/**
167 * Deal with a guest page fault.
168 *
169 * @returns Strict VBox status code.
170 * @retval VINF_EM_RAW_GUEST_TRAP
171 * @retval VINF_EM_RAW_EMULATE_INSTR
172 *
173 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
174 * @param pWalk The guest page table walk result.
175 * @param uErr The error code.
176 */
177PGM_BTH_DECL(VBOXSTRICTRC, Trap0eHandlerGuestFault)(PVMCPUCC pVCpu, PPGMPTWALK pWalk, RTGCUINT uErr)
178{
179 /*
180 * Calc the error code for the guest trap.
181 */
182 uint32_t uNewErr = GST_IS_NX_ACTIVE(pVCpu)
183 ? uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID)
184 : uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US);
185 if ( pWalk->fRsvdError
186 || pWalk->fBadPhysAddr)
187 {
188 uNewErr |= X86_TRAP_PF_RSVD | X86_TRAP_PF_P;
189 Assert(!pWalk->fNotPresent);
190 }
191 else if (!pWalk->fNotPresent)
192 uNewErr |= X86_TRAP_PF_P;
193 TRPMSetErrorCode(pVCpu, uNewErr);
194
195 LogFlow(("Guest trap; cr2=%RGv uErr=%RGv lvl=%d\n", pWalk->GCPtr, uErr, pWalk->uLevel));
196 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
197 return VINF_EM_RAW_GUEST_TRAP;
198}
199# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
200
201
202#if !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
203/**
204 * Deal with a guest page fault.
205 *
206 * The caller has taken the PGM lock.
207 *
208 * @returns Strict VBox status code.
209 *
210 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
211 * @param uErr The error code.
212 * @param pRegFrame The register frame.
213 * @param pvFault The fault address.
214 * @param pPage The guest page at @a pvFault.
215 * @param pWalk The guest page table walk result.
216 * @param pGstWalk The guest paging-mode specific walk information.
217 * @param pfLockTaken PGM lock taken here or not (out). This is true
218 * when we're called.
219 */
220static VBOXSTRICTRC PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame,
221 RTGCPTR pvFault, PPGMPAGE pPage, bool *pfLockTaken
222# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
223 , PPGMPTWALK pWalk
224 , PGSTPTWALK pGstWalk
225# endif
226 )
227{
228# if !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
229 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A };
230# endif
231 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
232 VBOXSTRICTRC rcStrict;
233
234 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
235 {
236 /*
237 * Physical page access handler.
238 */
239# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
240 const RTGCPHYS GCPhysFault = pWalk->GCPhys;
241# else
242 const RTGCPHYS GCPhysFault = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault);
243# endif
244 PPGMPHYSHANDLER pCur;
245 rcStrict = pgmHandlerPhysicalLookup(pVM, GCPhysFault, &pCur);
246 if (RT_SUCCESS(rcStrict))
247 {
248 PCPGMPHYSHANDLERTYPEINT const pCurType = PGMPHYSHANDLER_GET_TYPE(pVM, pCur);
249
250# ifdef PGM_SYNC_N_PAGES
251 /*
252 * If the region is write protected and we got a page not present fault, then sync
253 * the pages. If the fault was caused by a read, then restart the instruction.
254 * In case of write access continue to the GC write handler.
255 *
256 * ASSUMES that there is only one handler per page or that they have similar write properties.
257 */
258 if ( !(uErr & X86_TRAP_PF_P)
259 && pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
260 {
261# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
262 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
263# else
264 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
265# endif
266 if ( RT_FAILURE(rcStrict)
267 || !(uErr & X86_TRAP_PF_RW)
268 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
269 {
270 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
271 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
272 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
273 return rcStrict;
274 }
275 }
276# endif
277# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
278 /*
279 * If the access was not thru a #PF(RSVD|...) resync the page.
280 */
281 if ( !(uErr & X86_TRAP_PF_RSVD)
282 && pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
283# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
284 && (pWalk->fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK))
285 == PGM_PTATTRS_W_MASK /** @todo Remove pGstWalk->Core.fEffectiveUS and X86_PTE_US further down in the sync code. */
286# endif
287 )
288 {
289# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
290 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
291# else
292 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
293# endif
294 if ( RT_FAILURE(rcStrict)
295 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
296 {
297 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
298 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
299 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
300 return rcStrict;
301 }
302 }
303# endif
304
305 AssertMsg( pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
306 || (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE && (uErr & X86_TRAP_PF_RW)),
307 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enmKind=%d\n",
308 pvFault, GCPhysFault, pPage, uErr, pCurType->enmKind));
309 if (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
310 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysWrite);
311 else
312 {
313 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAll);
314 if (uErr & X86_TRAP_PF_RSVD) STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAllOpt);
315 }
316
317 if (pCurType->pfnPfHandler)
318 {
319 STAM_PROFILE_START(&pCur->Stat, h);
320
321 if (pCurType->fKeepPgmLock)
322 {
323 rcStrict = pCurType->pfnPfHandler(pVM, pVCpu, uErr, pRegFrame, pvFault, GCPhysFault,
324 !pCurType->fRing0DevInsIdx ? pCur->uUser
325 : (uintptr_t)PDMDeviceRing0IdxToInstance(pVM, pCur->uUser));
326
327 STAM_PROFILE_STOP(&pCur->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
328 }
329 else
330 {
331 uint64_t const uUser = !pCurType->fRing0DevInsIdx ? pCur->uUser
332 : (uintptr_t)PDMDeviceRing0IdxToInstance(pVM, pCur->uUser);
333 PGM_UNLOCK(pVM);
334 *pfLockTaken = false;
335
336 rcStrict = pCurType->pfnPfHandler(pVM, pVCpu, uErr, pRegFrame, pvFault, GCPhysFault, uUser);
337
338 STAM_PROFILE_STOP(&pCur->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
339 }
340 }
341 else
342 rcStrict = VINF_EM_RAW_EMULATE_INSTR;
343
344 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndPhys; });
345 return rcStrict;
346 }
347 AssertMsgReturn(rcStrict == VERR_NOT_FOUND, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)), rcStrict);
348 }
349
350 /*
351 * There is a handled area of the page, but this fault doesn't belong to it.
352 * We must emulate the instruction.
353 *
354 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
355 * we first check if this was a page-not-present fault for a page with only
356 * write access handlers. Restart the instruction if it wasn't a write access.
357 */
358 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersUnhandled);
359
360 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
361 && !(uErr & X86_TRAP_PF_P))
362 {
363# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
364 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
365# else
366 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
367# endif
368 if ( RT_FAILURE(rcStrict)
369 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE
370 || !(uErr & X86_TRAP_PF_RW))
371 {
372 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
373 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
374 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
375 return rcStrict;
376 }
377 }
378
379 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
380 * It's writing to an unhandled part of the LDT page several million times.
381 */
382 rcStrict = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
383 LogFlow(("PGM: PGMInterpretInstruction -> rcStrict=%d pPage=%R[pgmpage]\n", VBOXSTRICTRC_VAL(rcStrict), pPage));
384 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndUnhandled; });
385 return rcStrict;
386} /* if any kind of handler */
387# endif /* !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE*/
388
389
390/**
391 * \#PF Handler for raw-mode guest execution.
392 *
393 * @returns VBox status code (appropriate for trap handling and GC return).
394 *
395 * @param pVCpu The cross context virtual CPU structure.
396 * @param uErr The trap error code.
397 * @param pRegFrame Trap register frame.
398 * @param pvFault The fault address.
399 * @param pfLockTaken PGM lock taken here or not (out)
400 */
401PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
402{
403 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
404
405 *pfLockTaken = false;
406
407# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
408 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
409 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
410 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
411 && PGM_SHW_TYPE != PGM_TYPE_NONE
412 int rc;
413
414# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
415 /*
416 * Walk the guest page translation tables and check if it's a guest fault.
417 */
418 PGMPTWALK Walk;
419 GSTPTWALK GstWalk;
420 rc = PGM_GST_NAME(Walk)(pVCpu, pvFault, &Walk, &GstWalk);
421 if (RT_FAILURE_NP(rc))
422 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &Walk, uErr));
423
424 /* assert some GstWalk sanity. */
425# if PGM_GST_TYPE == PGM_TYPE_AMD64
426 /*AssertMsg(GstWalk.Pml4e.u == GstWalk.pPml4e->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pml4e.u, (uint64_t)GstWalk.pPml4e->u)); - not always true with SMP guests. */
427# endif
428# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
429 /*AssertMsg(GstWalk.Pdpe.u == GstWalk.pPdpe->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pdpe.u, (uint64_t)GstWalk.pPdpe->u)); - ditto */
430# endif
431 /*AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); - ditto */
432 /*AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); - ditto */
433 Assert(Walk.fSucceeded);
434 Assert(Walk.fEffective & PGM_PTATTRS_R_MASK);
435
436 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
437 {
438 if ( ( (uErr & X86_TRAP_PF_RW)
439 && !(Walk.fEffective & PGM_PTATTRS_W_MASK)
440 && ( (uErr & X86_TRAP_PF_US)
441 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
442 || ((uErr & X86_TRAP_PF_US) && !(Walk.fEffective & PGM_PTATTRS_US_MASK))
443 || ((uErr & X86_TRAP_PF_ID) && (Walk.fEffective & PGM_PTATTRS_NX_MASK))
444 )
445 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &Walk, uErr));
446 }
447
448 /* Take the big lock now before we update flags. */
449 *pfLockTaken = true;
450 PGM_LOCK_VOID(pVM);
451
452 /*
453 * Set the accessed and dirty flags.
454 */
455 /** @todo Should probably use cmpxchg logic here as we're potentially racing
456 * other CPUs in SMP configs. (the lock isn't enough, since we take it
457 * after walking and the page tables could be stale already) */
458# if PGM_GST_TYPE == PGM_TYPE_AMD64
459 if (!(GstWalk.Pml4e.u & X86_PML4E_A))
460 {
461 GstWalk.Pml4e.u |= X86_PML4E_A;
462 GST_ATOMIC_OR(&GstWalk.pPml4e->u, X86_PML4E_A);
463 }
464 if (!(GstWalk.Pdpe.u & X86_PDPE_A))
465 {
466 GstWalk.Pdpe.u |= X86_PDPE_A;
467 GST_ATOMIC_OR(&GstWalk.pPdpe->u, X86_PDPE_A);
468 }
469# endif
470 if (Walk.fBigPage)
471 {
472 Assert(GstWalk.Pde.u & X86_PDE_PS);
473 if (uErr & X86_TRAP_PF_RW)
474 {
475 if ((GstWalk.Pde.u & (X86_PDE4M_A | X86_PDE4M_D)) != (X86_PDE4M_A | X86_PDE4M_D))
476 {
477 GstWalk.Pde.u |= X86_PDE4M_A | X86_PDE4M_D;
478 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A | X86_PDE4M_D);
479 }
480 }
481 else
482 {
483 if (!(GstWalk.Pde.u & X86_PDE4M_A))
484 {
485 GstWalk.Pde.u |= X86_PDE4M_A;
486 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A);
487 }
488 }
489 }
490 else
491 {
492 Assert(!(GstWalk.Pde.u & X86_PDE_PS));
493 if (!(GstWalk.Pde.u & X86_PDE_A))
494 {
495 GstWalk.Pde.u |= X86_PDE_A;
496 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE_A);
497 }
498
499 if (uErr & X86_TRAP_PF_RW)
500 {
501# ifdef VBOX_WITH_STATISTICS
502 if (GstWalk.Pte.u & X86_PTE_D)
503 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageAlreadyDirty));
504 else
505 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtiedPage));
506# endif
507 if ((GstWalk.Pte.u & (X86_PTE_A | X86_PTE_D)) != (X86_PTE_A | X86_PTE_D))
508 {
509 GstWalk.Pte.u |= X86_PTE_A | X86_PTE_D;
510 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A | X86_PTE_D);
511 }
512 }
513 else
514 {
515 if (!(GstWalk.Pte.u & X86_PTE_A))
516 {
517 GstWalk.Pte.u |= X86_PTE_A;
518 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A);
519 }
520 }
521 Assert(GstWalk.Pte.u == GstWalk.pPte->u);
522 }
523#if 0
524 /* Disabling this since it's not reliable for SMP, see @bugref{10092#c22}. */
525 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u,
526 ("%RX64 %RX64 pPte=%p pPde=%p Pte=%RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u, GstWalk.pPte, GstWalk.pPde, (uint64_t)GstWalk.pPte->u));
527#endif
528
529# else /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
530 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A}; /** @todo eliminate this */
531
532 /* Take the big lock now. */
533 *pfLockTaken = true;
534 PGM_LOCK_VOID(pVM);
535# endif /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
536
537# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
538 /*
539 * If it is a reserved bit fault we know that it is an MMIO (access
540 * handler) related fault and can skip some 200 lines of code.
541 */
542 if (uErr & X86_TRAP_PF_RSVD)
543 {
544 Assert(uErr & X86_TRAP_PF_P);
545 PPGMPAGE pPage;
546# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
547 rc = pgmPhysGetPageEx(pVM, Walk.GCPhys, &pPage);
548 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
549 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
550 pfLockTaken, &Walk, &GstWalk));
551 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
552# else
553 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault), &pPage);
554 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
555 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
556 pfLockTaken));
557 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
558# endif
559 AssertRC(rc);
560 PGM_INVL_PG(pVCpu, pvFault);
561 return rc; /* Restart with the corrected entry. */
562 }
563# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
564
565 /*
566 * Fetch the guest PDE, PDPE and PML4E.
567 */
568# if PGM_SHW_TYPE == PGM_TYPE_32BIT
569 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
570 PX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
571
572# elif PGM_SHW_TYPE == PGM_TYPE_PAE
573 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
574 PX86PDPAE pPDDst;
575# if PGM_GST_TYPE == PGM_TYPE_PAE
576 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, GstWalk.Pdpe.u, &pPDDst);
577# else
578 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, X86_PDPE_P, &pPDDst); /* RW, US and A are reserved in PAE mode. */
579# endif
580 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
581
582# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
583 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
584 PX86PDPAE pPDDst;
585# if PGM_GST_TYPE == PGM_TYPE_PROT /* (AMD-V nested paging) */
586 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A,
587 X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A, &pPDDst);
588# else
589 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, GstWalk.Pml4e.u, GstWalk.Pdpe.u, &pPDDst);
590# endif
591 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
592
593# elif PGM_SHW_TYPE == PGM_TYPE_EPT
594 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
595 PEPTPD pPDDst;
596 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
597 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
598# endif
599 Assert(pPDDst);
600
601# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
602 /*
603 * Dirty page handling.
604 *
605 * If we successfully correct the write protection fault due to dirty bit
606 * tracking, then return immediately.
607 */
608 if (uErr & X86_TRAP_PF_RW) /* write fault? */
609 {
610 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
611 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], GstWalk.pPde, pvFault);
612 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
613 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
614 {
615 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0
616 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
617 ? &pVCpu->pgm.s.Stats.StatRZTrap0eTime2DirtyAndAccessed
618 : &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
619 Log8(("Trap0eHandler: returns VINF_SUCCESS\n"));
620 return VINF_SUCCESS;
621 }
622#ifdef DEBUG_bird
623 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); // - triggers with smp w7 guests.
624 AssertMsg(Walk.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); // - ditto.
625#endif
626 }
627
628# if 0 /* rarely useful; leave for debugging. */
629 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
630# endif
631# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
632
633 /*
634 * A common case is the not-present error caused by lazy page table syncing.
635 *
636 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
637 * here so we can safely assume that the shadow PT is present when calling
638 * SyncPage later.
639 *
640 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
641 * of mapping conflict and defer to SyncCR3 in R3.
642 * (Again, we do NOT support access handlers for non-present guest pages.)
643 *
644 */
645# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
646 Assert(GstWalk.Pde.u & X86_PDE_P);
647# endif
648 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
649 && !SHW_PDE_IS_P(pPDDst->a[iPDDst]))
650 {
651 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2SyncPT; });
652# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
653 LogFlow(("=>SyncPT %04x = %08RX64\n", (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, (uint64_t)GstWalk.Pde.u));
654 rc = PGM_BTH_NAME(SyncPT)(pVCpu, (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, GstWalk.pPd, pvFault);
655# else
656 LogFlow(("=>SyncPT pvFault=%RGv\n", pvFault));
657 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0, NULL, pvFault);
658# endif
659 if (RT_SUCCESS(rc))
660 return rc;
661 Log(("SyncPT: %RGv failed!! rc=%Rrc\n", pvFault, rc));
662 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
663 return VINF_PGM_SYNC_CR3;
664 }
665
666 /*
667 * Check if this fault address is flagged for special treatment,
668 * which means we'll have to figure out the physical address and
669 * check flags associated with it.
670 *
671 * ASSUME that we can limit any special access handling to pages
672 * in page tables which the guest believes to be present.
673 */
674# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
675 RTGCPHYS GCPhys = Walk.GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
676# else
677 RTGCPHYS GCPhys = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
678# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
679 PPGMPAGE pPage;
680 rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
681 if (RT_FAILURE(rc))
682 {
683 /*
684 * When the guest accesses invalid physical memory (e.g. probing
685 * of RAM or accessing a remapped MMIO range), then we'll fall
686 * back to the recompiler to emulate the instruction.
687 */
688 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
689 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersInvalid);
690 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2InvalidPhys; });
691 return VINF_EM_RAW_EMULATE_INSTR;
692 }
693
694 /*
695 * Any handlers for this page?
696 */
697 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
698# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
699 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken,
700 &Walk, &GstWalk));
701# else
702 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken));
703# endif
704
705 /*
706 * We are here only if page is present in Guest page tables and
707 * trap is not handled by our handlers.
708 *
709 * Check it for page out-of-sync situation.
710 */
711 if (!(uErr & X86_TRAP_PF_P))
712 {
713 /*
714 * Page is not present in our page tables. Try to sync it!
715 */
716 if (uErr & X86_TRAP_PF_US)
717 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
718 else /* supervisor */
719 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
720
721 if (PGM_PAGE_IS_BALLOONED(pPage))
722 {
723 /* Emulate reads from ballooned pages as they are not present in
724 our shadow page tables. (Required for e.g. Solaris guests; soft
725 ecc, random nr generator.) */
726 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
727 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
728 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
729 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Ballooned; });
730 return rc;
731 }
732
733# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
734 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
735# else
736 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
737# endif
738 if (RT_SUCCESS(rc))
739 {
740 /* The page was successfully synced, return to the guest. */
741 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSync; });
742 return VINF_SUCCESS;
743 }
744 }
745 else /* uErr & X86_TRAP_PF_P: */
746 {
747 /*
748 * Write protected pages are made writable when the guest makes the
749 * first write to it. This happens for pages that are shared, write
750 * monitored or not yet allocated.
751 *
752 * We may also end up here when CR0.WP=0 in the guest.
753 *
754 * Also, a side effect of not flushing global PDEs are out of sync
755 * pages due to physical monitored regions, that are no longer valid.
756 * Assume for now it only applies to the read/write flag.
757 */
758 if (uErr & X86_TRAP_PF_RW)
759 {
760 /*
761 * Check if it is a read-only page.
762 */
763 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
764 {
765 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
766 Assert(!PGM_PAGE_IS_ZERO(pPage));
767 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
768 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2MakeWritable; });
769
770 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
771 if (rc != VINF_SUCCESS)
772 {
773 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
774 return rc;
775 }
776 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
777 return VINF_EM_NO_MEMORY;
778 }
779
780# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
781 /*
782 * Check to see if we need to emulate the instruction if CR0.WP=0.
783 */
784 if ( !(Walk.fEffective & PGM_PTATTRS_W_MASK)
785 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
786 && CPUMGetGuestCPL(pVCpu) < 3)
787 {
788 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
789
790 /*
791 * The Netware WP0+RO+US hack.
792 *
793 * Netware sometimes(/always?) runs with WP0. It has been observed doing
794 * excessive write accesses to pages which are mapped with US=1 and RW=0
795 * while WP=0. This causes a lot of exits and extremely slow execution.
796 * To avoid trapping and emulating every write here, we change the shadow
797 * page table entry to map it as US=0 and RW=1 until user mode tries to
798 * access it again (see further below). We count these shadow page table
799 * changes so we can avoid having to clear the page pool every time the WP
800 * bit changes to 1 (see PGMCr0WpEnabled()).
801 */
802# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) && 1
803 if ( (Walk.fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK)) == PGM_PTATTRS_US_MASK
804 && (Walk.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
805 && pVM->cCpus == 1 /* Sorry, no go on SMP. Add CFGM option? */)
806 {
807 Log(("PGM #PF: Netware WP0+RO+US hack: pvFault=%RGp uErr=%#x (big=%d)\n", pvFault, uErr, Walk.fBigPage));
808 rc = pgmShwMakePageSupervisorAndWritable(pVCpu, pvFault, Walk.fBigPage, PGM_MK_PG_IS_WRITE_FAULT);
809 if (rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3)
810 {
811 PGM_INVL_PG(pVCpu, pvFault);
812 pVCpu->pgm.s.cNetwareWp0Hacks++;
813 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsHack; });
814 return rc;
815 }
816 AssertMsg(RT_FAILURE_NP(rc), ("%Rrc\n", rc));
817 Log(("pgmShwMakePageSupervisorAndWritable(%RGv) failed with rc=%Rrc - ignored\n", pvFault, rc));
818 }
819# endif
820
821 /* Interpret the access. */
822 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
823 Log(("PGM #PF: WP0 emulation (pvFault=%RGp uErr=%#x cpl=%d fBig=%d fEffUs=%d)\n", pvFault, uErr, CPUMGetGuestCPL(pVCpu), Walk.fBigPage, !!(Walk.fEffective & PGM_PTATTRS_US_MASK)));
824 if (RT_SUCCESS(rc))
825 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulInRZ);
826 else
827 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulToR3);
828 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2WPEmulation; });
829 return rc;
830 }
831# endif
832 /// @todo count the above case; else
833 if (uErr & X86_TRAP_PF_US)
834 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
835 else /* supervisor */
836 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
837
838 /*
839 * Sync the page.
840 *
841 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
842 * page is not present, which is not true in this case.
843 */
844# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
845 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
846# else
847 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
848# endif
849 if (RT_SUCCESS(rc))
850 {
851 /*
852 * Page was successfully synced, return to guest but invalidate
853 * the TLB first as the page is very likely to be in it.
854 */
855# if PGM_SHW_TYPE == PGM_TYPE_EPT
856 HMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
857# else
858 PGM_INVL_PG(pVCpu, pvFault);
859# endif
860# ifdef VBOX_STRICT
861 PGMPTWALK GstPageWalk;
862 GstPageWalk.GCPhys = RTGCPHYS_MAX;
863 if (!pVM->pgm.s.fNestedPaging)
864 {
865 rc = PGMGstGetPage(pVCpu, pvFault, &GstPageWalk);
866 AssertMsg(RT_SUCCESS(rc) && ((GstPageWalk.fEffective & X86_PTE_RW) || ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG && CPUMGetGuestCPL(pVCpu) < 3)), ("rc=%Rrc fPageGst=%RX64\n", rc, GstPageWalk.fEffective));
867 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GstPageWalk.GCPhys, GstPageWalk.fEffective));
868 }
869# if 0 /* Bogus! Triggers incorrectly with w7-64 and later for the SyncPage case: "Pde at %RGv changed behind our back?" */
870 uint64_t fPageShw = 0;
871 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
872 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */,
873 ("rc=%Rrc fPageShw=%RX64 GCPhys2=%RGp fPageGst=%RX64 pvFault=%RGv\n", rc, fPageShw, GstPageWalk.GCPhys, fPageGst, pvFault));
874# endif
875# endif /* VBOX_STRICT */
876 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndObs; });
877 return VINF_SUCCESS;
878 }
879 }
880# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
881 /*
882 * Check for Netware WP0+RO+US hack from above and undo it when user
883 * mode accesses the page again.
884 */
885 else if ( (Walk.fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK)) == PGM_PTATTRS_US_MASK
886 && (Walk.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
887 && pVCpu->pgm.s.cNetwareWp0Hacks > 0
888 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
889 && CPUMGetGuestCPL(pVCpu) == 3
890 && pVM->cCpus == 1
891 )
892 {
893 Log(("PGM #PF: Undo netware WP0+RO+US hack: pvFault=%RGp uErr=%#x\n", pvFault, uErr));
894 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
895 if (RT_SUCCESS(rc))
896 {
897 PGM_INVL_PG(pVCpu, pvFault);
898 pVCpu->pgm.s.cNetwareWp0Hacks--;
899 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsUnhack; });
900 return VINF_SUCCESS;
901 }
902 }
903# endif /* PGM_WITH_PAGING */
904
905 /** @todo else: why are we here? */
906
907# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && defined(VBOX_STRICT)
908 /*
909 * Check for VMM page flags vs. Guest page flags consistency.
910 * Currently only for debug purposes.
911 */
912 if (RT_SUCCESS(rc))
913 {
914 /* Get guest page flags. */
915 PGMPTWALK GstPageWalk;
916 int rc2 = PGMGstGetPage(pVCpu, pvFault, &GstPageWalk);
917 if (RT_SUCCESS(rc2))
918 {
919 uint64_t fPageShw = 0;
920 rc2 = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
921
922#if 0
923 /*
924 * Compare page flags.
925 * Note: we have AVL, A, D bits desynced.
926 */
927 AssertMsg( (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
928 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
929 || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0
930 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
931 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
932 && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW
933 && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US),
934 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64 rc=%d\n",
935 pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst, rc));
93601:01:15.623511 00:08:43.266063 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
93701:01:15.623511 00:08:43.266064 Location : e:\vbox\svn\trunk\srcPage flags mismatch! pvFault=fffff801b0d7b000 uErr=11 GCPhys=0000000019b52000 fPageShw=0 fPageGst=77b0000000000121 rc=0
938
93901:01:15.625516 00:08:43.268051 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
94001:01:15.625516 00:08:43.268051 Location :
941e:\vbox\svn\trunk\srcPage flags mismatch!
942pvFault=fffff801b0d7b000
943 uErr=11 X86_TRAP_PF_ID | X86_TRAP_PF_P
944GCPhys=0000000019b52000
945fPageShw=0
946fPageGst=77b0000000000121
947rc=0
948#endif
949
950 }
951 else
952 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
953 }
954 else
955 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
956# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && VBOX_STRICT */
957 }
958
959
960 /*
961 * If we get here it is because something failed above, i.e. most like guru
962 * meditiation time.
963 */
964 LogRel(("%s: returns rc=%Rrc pvFault=%RGv uErr=%RX64 cs:rip=%04x:%08RX64\n",
965 __PRETTY_FUNCTION__, rc, pvFault, (uint64_t)uErr, pRegFrame->cs.Sel, pRegFrame->rip));
966 return rc;
967
968# else /* Nested paging, EPT except PGM_GST_TYPE = PROT, NONE. */
969 NOREF(uErr); NOREF(pRegFrame); NOREF(pvFault);
970 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
971 return VERR_PGM_NOT_USED_IN_MODE;
972# endif
973}
974
975
976/**
977 * Nested \#PF handler for nested-guest hardware-assisted execution using nested
978 * paging.
979 *
980 * @returns VBox status code (appropriate for trap handling and GC return).
981 * @param pVCpu The cross context virtual CPU structure.
982 * @param uErr The fault error (X86_TRAP_PF_*).
983 * @param pRegFrame The register frame.
984 * @param GCPhysNested The nested-guest physical address being accessed.
985 * @param fIsLinearAddrValid Whether translation of a nested-guest linear address
986 * caused this fault. If @c false, GCPtrNested must be
987 * 0.
988 * @param GCPtrNested The nested-guest linear address that caused this
989 * fault.
990 * @param pWalk The guest page table walk result.
991 * @param pfLockTaken Where to store whether the PGM lock is still held
992 * when this function completes.
993 */
994PGM_BTH_DECL(int, NestedTrap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysNested,
995 bool fIsLinearAddrValid, RTGCPTR GCPtrNested, PPGMPTWALK pWalk, bool *pfLockTaken)
996{
997 *pfLockTaken = false;
998# if defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT) \
999 && ( PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_32BIT \
1000 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1001 && PGM_SHW_TYPE == PGM_TYPE_EPT
1002
1003 Assert(CPUMIsGuestVmxEptPagingEnabled(pVCpu));
1004
1005 /*
1006 * Walk the guest EPT tables and check if it's an EPT violation or misconfiguration.
1007 */
1008 PGMPTWALKGST GstWalkAll;
1009 int rc = pgmGstSlatWalk(pVCpu, GCPhysNested, fIsLinearAddrValid, GCPtrNested, pWalk, &GstWalkAll);
1010 if (RT_FAILURE(rc))
1011 return rc;
1012
1013 Assert(GstWalkAll.enmType == PGMPTWALKGSTTYPE_EPT);
1014 Assert(pWalk->fSucceeded);
1015 Assert(pWalk->fEffective & PGM_PTATTRS_R_MASK);
1016 Assert(pWalk->fIsSlat);
1017
1018 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
1019 {
1020 if ( ( (uErr & X86_TRAP_PF_RW)
1021 && !(pWalk->fEffective & PGM_PTATTRS_W_MASK)
1022 && ( (uErr & X86_TRAP_PF_US)
1023 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
1024 || ((uErr & X86_TRAP_PF_US) && !(pWalk->fEffective & PGM_PTATTRS_US_MASK))
1025 || ((uErr & X86_TRAP_PF_ID) && (pWalk->fEffective & PGM_PTATTRS_NX_MASK))
1026 )
1027 return VERR_ACCESS_DENIED;
1028 }
1029
1030 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1031 RTGCPHYS const GCPhysFault = PGM_A20_APPLY(pVCpu, GCPhysNested & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1032 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A };
1033
1034 /* Take the big lock now. */
1035 *pfLockTaken = true;
1036 PGM_LOCK_VOID(pVM);
1037
1038 /*
1039 * Check if this is an APIC-access page access (VMX specific).
1040 */
1041 RTGCPHYS const GCPhysApicAccess = CPUMGetGuestVmxApicAccessPageAddr(pVCpu);
1042 if ((pWalk->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysApicAccess)
1043 {
1044 PPGMPAGE pPage;
1045 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, GCPhysApicAccess), &pPage);
1046 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1047 {
1048 rc = VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pWalk->GCPhys, pPage,
1049 pfLockTaken));
1050 return rc;
1051 }
1052 }
1053
1054# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1055 /*
1056 * Check if this is an MMIO access.
1057 */
1058 if (uErr & X86_TRAP_PF_RSVD)
1059 {
1060 PPGMPAGE pPage;
1061 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, (RTGCPHYS)GCPhysFault), &pPage);
1062 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1063 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, GCPhysFault, pPage,
1064 pfLockTaken));
1065 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, GCPhysFault, 1, uErr);
1066 AssertRC(rc);
1067 HMInvalidatePhysPage(pVM, GCPhysFault);
1068 return rc; /* Restart with the corrected entry. */
1069 }
1070# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1071
1072 /*
1073 * Fetch the guest EPT page directory pointer.
1074 */
1075 const unsigned iPDDst = ((GCPhysFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
1076 PEPTPD pPDDst;
1077 rc = pgmShwGetEPTPDPtr(pVCpu, GCPhysFault, NULL /* ppPdpt */, &pPDDst);
1078 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
1079 Assert(pPDDst);
1080
1081 /*
1082 * A common case is the not-present error caused by lazy page table syncing.
1083 *
1084 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
1085 * here so we can safely assume that the shadow PT is present when calling
1086 * SyncPage later.
1087 *
1088 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
1089 * of mapping conflict and defer to SyncCR3 in R3.
1090 * (Again, we do NOT support access handlers for non-present guest pages.)
1091 *
1092 */
1093 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
1094 && !SHW_PDE_IS_P(pPDDst->a[iPDDst]))
1095 {
1096 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2SyncPT; });
1097 LogFlow(("=>SyncPT GCPhysFault=%RGp\n", GCPhysFault));
1098 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0 /* iPDSrc */, NULL /* pPDSrc */, GCPhysFault);
1099 if (RT_SUCCESS(rc))
1100 return rc;
1101 Log(("SyncPT: %RGp failed!! rc=%Rrc\n", GCPhysFault, rc));
1102 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
1103 return VINF_PGM_SYNC_CR3;
1104 }
1105
1106 /*
1107 * Check if this fault address is flagged for special treatment,
1108 * which means we'll have to figure out the physical address and
1109 * check flags associated with it.
1110 *
1111 * ASSUME that we can limit any special access handling to pages
1112 * in page tables which the guest believes to be present.
1113 */
1114 PPGMPAGE pPage;
1115 rc = pgmPhysGetPageEx(pVM, GCPhysFault, &pPage);
1116 if (RT_FAILURE(rc))
1117 {
1118 /*
1119 * When the guest accesses invalid physical memory (e.g. probing
1120 * of RAM or accessing a remapped MMIO range), then we'll fall
1121 * back to the recompiler to emulate the instruction.
1122 */
1123 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhysFault, rc));
1124 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersInvalid);
1125 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2InvalidPhys; });
1126 return VINF_EM_RAW_EMULATE_INSTR;
1127 }
1128
1129 /*
1130 * Any handlers for this page?
1131 */
1132 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1133 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, GCPhysFault, pPage,
1134 pfLockTaken));
1135
1136 /*
1137 * We are here only if page is present in Guest page tables and
1138 * trap is not handled by our handlers.
1139 *
1140 * Check it for page out-of-sync situation.
1141 */
1142 if (!(uErr & X86_TRAP_PF_P))
1143 {
1144 /*
1145 * Page is not present in our page tables. Try to sync it!
1146 */
1147 if (uErr & X86_TRAP_PF_US)
1148 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
1149 else /* supervisor */
1150 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
1151
1152 if (PGM_PAGE_IS_BALLOONED(pPage))
1153 {
1154 /* Emulate reads from ballooned pages as they are not present in
1155 our shadow page tables. (Required for e.g. Solaris guests; soft
1156 ecc, random nr generator.) */
1157 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, GCPhysFault));
1158 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
1159 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
1160 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Ballooned; });
1161 return rc;
1162 }
1163
1164 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, GCPhysFault, PGM_SYNC_NR_PAGES, uErr);
1165 if (RT_SUCCESS(rc))
1166 {
1167 /* The page was successfully synced, return to the guest. */
1168 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSync; });
1169 return VINF_SUCCESS;
1170 }
1171 }
1172 else
1173 {
1174 /*
1175 * Write protected pages are made writable when the guest makes the
1176 * first write to it. This happens for pages that are shared, write
1177 * monitored or not yet allocated.
1178 *
1179 * We may also end up here when CR0.WP=0 in the guest.
1180 *
1181 * Also, a side effect of not flushing global PDEs are out of sync
1182 * pages due to physical monitored regions, that are no longer valid.
1183 * Assume for now it only applies to the read/write flag.
1184 */
1185 if (uErr & X86_TRAP_PF_RW)
1186 {
1187 /*
1188 * Check if it is a read-only page.
1189 */
1190 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1191 {
1192 Assert(!PGM_PAGE_IS_ZERO(pPage));
1193 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhysFault));
1194 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2MakeWritable; });
1195
1196 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhysFault);
1197 if (rc != VINF_SUCCESS)
1198 {
1199 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
1200 return rc;
1201 }
1202 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
1203 return VINF_EM_NO_MEMORY;
1204 }
1205
1206 if (uErr & X86_TRAP_PF_US)
1207 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
1208 else
1209 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
1210
1211 /*
1212 * Sync the page.
1213 *
1214 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
1215 * page is not present, which is not true in this case.
1216 */
1217 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, GCPhysFault, 1, uErr);
1218 if (RT_SUCCESS(rc))
1219 {
1220 /*
1221 * Page was successfully synced, return to guest but invalidate
1222 * the TLB first as the page is very likely to be in it.
1223 */
1224 HMInvalidatePhysPage(pVM, GCPhysFault);
1225 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndObs; });
1226 return VINF_SUCCESS;
1227 }
1228 }
1229 }
1230
1231 /*
1232 * If we get here it is because something failed above, i.e. most like guru meditation time.
1233 */
1234 LogRelFunc(("returns rc=%Rrc GCPhysFault=%RGp uErr=%RX64 cs:rip=%04x:%08RX64\n", rc, GCPhysFault, (uint64_t)uErr,
1235 pRegFrame->cs.Sel, pRegFrame->rip));
1236 return rc;
1237
1238# else
1239 RT_NOREF7(pVCpu, uErr, pRegFrame, GCPhysNested, fIsLinearAddrValid, GCPtrNested, pWalk);
1240 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
1241 return VERR_PGM_NOT_USED_IN_MODE;
1242# endif
1243}
1244
1245#endif /* !IN_RING3 */
1246
1247
1248/**
1249 * Emulation of the invlpg instruction.
1250 *
1251 *
1252 * @returns VBox status code.
1253 *
1254 * @param pVCpu The cross context virtual CPU structure.
1255 * @param GCPtrPage Page to invalidate.
1256 *
1257 * @remark ASSUMES that the guest is updating before invalidating. This order
1258 * isn't required by the CPU, so this is speculative and could cause
1259 * trouble.
1260 * @remark No TLB shootdown is done on any other VCPU as we assume that
1261 * invlpg emulation is the *only* reason for calling this function.
1262 * (The guest has to shoot down TLB entries on other CPUs itself)
1263 * Currently true, but keep in mind!
1264 *
1265 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1266 * Should only be required when PGMPOOL_WITH_OPTIMIZED_DIRTY_PT is active (PAE or AMD64 (for now))
1267 */
1268PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
1269{
1270#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1271 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
1272 && PGM_SHW_TYPE != PGM_TYPE_NONE
1273 int rc;
1274 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1275 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1276
1277 PGM_LOCK_ASSERT_OWNER(pVM);
1278
1279 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1280
1281 /*
1282 * Get the shadow PD entry and skip out if this PD isn't present.
1283 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1284 */
1285# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1286 const unsigned iPDDst = (uint32_t)GCPtrPage >> SHW_PD_SHIFT;
1287 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1288
1289 /* Fetch the pgm pool shadow descriptor. */
1290 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1291# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1292 if (!pShwPde)
1293 {
1294 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1295 return VINF_SUCCESS;
1296 }
1297# else
1298 Assert(pShwPde);
1299# endif
1300
1301# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1302 const unsigned iPdpt = (uint32_t)GCPtrPage >> X86_PDPT_SHIFT;
1303 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
1304
1305 /* If the shadow PDPE isn't present, then skip the invalidate. */
1306# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1307 if (!pPdptDst || !(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1308# else
1309 if (!(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1310# endif
1311 {
1312 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1313 PGM_INVL_PG(pVCpu, GCPtrPage);
1314 return VINF_SUCCESS;
1315 }
1316
1317 /* Fetch the pgm pool shadow descriptor. */
1318 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1319 AssertReturn(pShwPde, VERR_PGM_POOL_GET_PAGE_FAILED);
1320
1321 PX86PDPAE pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1322 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1323 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1324
1325# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1326 /* PML4 */
1327 /*const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;*/
1328 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1329 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1330 PX86PDPAE pPDDst;
1331 PX86PDPT pPdptDst;
1332 PX86PML4E pPml4eDst;
1333 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1334 if (rc != VINF_SUCCESS)
1335 {
1336 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1337 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1338 PGM_INVL_PG(pVCpu, GCPtrPage);
1339 return VINF_SUCCESS;
1340 }
1341 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1342 Assert(pPDDst);
1343 Assert(pPdptDst->a[iPdpt].u & X86_PDPE_P);
1344
1345 /* Fetch the pgm pool shadow descriptor. */
1346 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1347 Assert(pShwPde);
1348
1349# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1350
1351 const SHWPDE PdeDst = *pPdeDst;
1352 if (!(PdeDst.u & X86_PDE_P))
1353 {
1354 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1355 PGM_INVL_PG(pVCpu, GCPtrPage);
1356 return VINF_SUCCESS;
1357 }
1358
1359 /*
1360 * Get the guest PD entry and calc big page.
1361 */
1362# if PGM_GST_TYPE == PGM_TYPE_32BIT
1363 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1364 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
1365 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1366# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1367 unsigned iPDSrc = 0;
1368# if PGM_GST_TYPE == PGM_TYPE_PAE
1369 X86PDPE PdpeSrcIgn;
1370 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrcIgn);
1371# else /* AMD64 */
1372 PX86PML4E pPml4eSrcIgn;
1373 X86PDPE PdpeSrcIgn;
1374 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrcIgn, &PdpeSrcIgn, &iPDSrc);
1375# endif
1376 GSTPDE PdeSrc;
1377
1378 if (pPDSrc)
1379 PdeSrc = pPDSrc->a[iPDSrc];
1380 else
1381 PdeSrc.u = 0;
1382# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1383 const bool fWasBigPage = RT_BOOL(PdeDst.u & PGM_PDFLAGS_BIG_PAGE);
1384 const bool fIsBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
1385 if (fWasBigPage != fIsBigPage)
1386 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1387
1388# ifdef IN_RING3
1389 /*
1390 * If a CR3 Sync is pending we may ignore the invalidate page operation
1391 * depending on the kind of sync and if it's a global page or not.
1392 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1393 */
1394# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1395 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1396 || ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1397 && fIsBigPage
1398 && (PdeSrc.u & X86_PDE4M_G)
1399 )
1400 )
1401# else
1402 if (VM_FF_IS_ANY_SET(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1403# endif
1404 {
1405 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1406 return VINF_SUCCESS;
1407 }
1408# endif /* IN_RING3 */
1409
1410 /*
1411 * Deal with the Guest PDE.
1412 */
1413 rc = VINF_SUCCESS;
1414 if (PdeSrc.u & X86_PDE_P)
1415 {
1416 Assert( (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
1417 && ((PdeSrc.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW) || pVCpu->pgm.s.cNetwareWp0Hacks > 0));
1418 if (!fIsBigPage)
1419 {
1420 /*
1421 * 4KB - page.
1422 */
1423 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1424 RTGCPHYS GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1425
1426# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1427 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1428 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
1429# endif
1430 if (pShwPage->GCPhys == GCPhys)
1431 {
1432 /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1433 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1434
1435 PGSTPT pPTSrc;
1436 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1437 if (RT_SUCCESS(rc))
1438 {
1439 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1440 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1441 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1442 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1443 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1444 GCPtrPage, PteSrc.u & X86_PTE_P,
1445 (PteSrc.u & PdeSrc.u & X86_PTE_RW),
1446 (PteSrc.u & PdeSrc.u & X86_PTE_US),
1447 (uint64_t)PteSrc.u,
1448 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1449 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1450 }
1451 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1452 PGM_INVL_PG(pVCpu, GCPtrPage);
1453 }
1454 else
1455 {
1456 /*
1457 * The page table address changed.
1458 */
1459 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1460 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1461 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1462 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1463 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1464 PGM_INVL_VCPU_TLBS(pVCpu);
1465 }
1466 }
1467 else
1468 {
1469 /*
1470 * 2/4MB - page.
1471 */
1472 /* Before freeing the page, check if anything really changed. */
1473 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1474 RTGCPHYS GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1475# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1476 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1477 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
1478# endif
1479 if ( pShwPage->GCPhys == GCPhys
1480 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1481 {
1482 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1483 /** @todo This test is wrong as it cannot check the G bit!
1484 * FIXME */
1485 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1486 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1487 && ( (PdeSrc.u & X86_PDE4M_D) /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1488 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1489 {
1490 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1491 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1492 return VINF_SUCCESS;
1493 }
1494 }
1495
1496 /*
1497 * Ok, the page table is present and it's been changed in the guest.
1498 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1499 * We could do this for some flushes in GC too, but we need an algorithm for
1500 * deciding which 4MB pages containing code likely to be executed very soon.
1501 */
1502 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1503 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1504 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1505 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1506 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1507 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1508 }
1509 }
1510 else
1511 {
1512 /*
1513 * Page directory is not present, mark shadow PDE not present.
1514 */
1515 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1516 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1517 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1518 PGM_INVL_PG(pVCpu, GCPtrPage);
1519 }
1520 return rc;
1521
1522#else /* guest real and protected mode, nested + ept, none. */
1523 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1524 NOREF(pVCpu); NOREF(GCPtrPage);
1525 return VINF_SUCCESS;
1526#endif
1527}
1528
1529#if PGM_SHW_TYPE != PGM_TYPE_NONE
1530
1531/**
1532 * Update the tracking of shadowed pages.
1533 *
1534 * @param pVCpu The cross context virtual CPU structure.
1535 * @param pShwPage The shadow page.
1536 * @param HCPhys The physical page we is being dereferenced.
1537 * @param iPte Shadow PTE index
1538 * @param GCPhysPage Guest physical address (only valid if pShwPage->fDirty is set)
1539 */
1540DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte,
1541 RTGCPHYS GCPhysPage)
1542{
1543 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1544
1545# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1546 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1547 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1548
1549 /* Use the hint we retrieved from the cached guest PT. */
1550 if (pShwPage->fDirty)
1551 {
1552 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1553
1554 Assert(pShwPage->cPresent);
1555 Assert(pPool->cPresent);
1556 pShwPage->cPresent--;
1557 pPool->cPresent--;
1558
1559 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysPage);
1560 AssertRelease(pPhysPage);
1561 pgmTrackDerefGCPhys(pPool, pShwPage, pPhysPage, iPte);
1562 return;
1563 }
1564# else
1565 NOREF(GCPhysPage);
1566# endif
1567
1568 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatTrackDeref, a);
1569 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1570
1571 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1572 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1573 * 2. write protect all shadowed pages. I.e. implement caching.
1574 */
1575 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1576
1577 /*
1578 * Find the guest address.
1579 */
1580 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
1581 pRam;
1582 pRam = pRam->CTX_SUFF(pNext))
1583 {
1584 unsigned iPage = pRam->cb >> GUEST_PAGE_SHIFT;
1585 while (iPage-- > 0)
1586 {
1587 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1588 {
1589 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1590
1591 Assert(pShwPage->cPresent);
1592 Assert(pPool->cPresent);
1593 pShwPage->cPresent--;
1594 pPool->cPresent--;
1595
1596 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1597 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatTrackDeref, a);
1598 return;
1599 }
1600 }
1601 }
1602
1603 for (;;)
1604 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1605}
1606
1607
1608/**
1609 * Update the tracking of shadowed pages.
1610 *
1611 * @param pVCpu The cross context virtual CPU structure.
1612 * @param pShwPage The shadow page.
1613 * @param u16 The top 16-bit of the pPage->HCPhys.
1614 * @param pPage Pointer to the guest page. this will be modified.
1615 * @param iPTDst The index into the shadow table.
1616 */
1617DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16,
1618 PPGMPAGE pPage, const unsigned iPTDst)
1619{
1620 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1621
1622 /*
1623 * Just deal with the simple first time here.
1624 */
1625 if (!u16)
1626 {
1627 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackVirgin);
1628 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1629 /* Save the page table index. */
1630 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, iPTDst);
1631 }
1632 else
1633 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1634
1635 /* write back */
1636 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1637 PGM_PAGE_SET_TRACKING(pVM, pPage, u16);
1638
1639 /* update statistics. */
1640 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1641 pShwPage->cPresent++;
1642 if (pShwPage->iFirstPresent > iPTDst)
1643 pShwPage->iFirstPresent = iPTDst;
1644}
1645
1646
1647/**
1648 * Modifies a shadow PTE to account for access handlers.
1649 *
1650 * @param pVM The cross context VM structure.
1651 * @param pPage The page in question.
1652 * @param fPteSrc The shadowed flags of the source PTE. Must include the
1653 * A (accessed) bit so it can be emulated correctly.
1654 * @param pPteDst The shadow PTE (output). This is temporary storage and
1655 * does not need to be set atomically.
1656 */
1657DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVMCC pVM, PCPGMPAGE pPage, uint64_t fPteSrc, PSHWPTE pPteDst)
1658{
1659 NOREF(pVM); RT_NOREF_PV(fPteSrc);
1660
1661 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1662 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1663 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1664 {
1665 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark read-only\n", pPage));
1666# if PGM_SHW_TYPE == PGM_TYPE_EPT
1667 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage) | EPT_E_READ | EPT_E_EXECUTE | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
1668# else
1669 if (fPteSrc & X86_PTE_A)
1670 {
1671 SHW_PTE_SET(*pPteDst, fPteSrc | PGM_PAGE_GET_HCPHYS(pPage));
1672 SHW_PTE_SET_RO(*pPteDst);
1673 }
1674 else
1675 SHW_PTE_SET(*pPteDst, 0);
1676# endif
1677 }
1678# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1679# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1680 else if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
1681 && ( BTH_IS_NP_ACTIVE(pVM)
1682 || (fPteSrc & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW) /** @todo Remove X86_PTE_US here and pGstWalk->Core.fEffectiveUS before the sync page test. */
1683# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1684 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1685# endif
1686 )
1687 {
1688 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1689# if PGM_SHW_TYPE == PGM_TYPE_EPT
1690 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1691 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg
1692 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1693 | EPT_E_WRITE
1694 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1695 | EPT_E_MEMTYPE_INVALID_3;
1696# else
1697 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1698 SHW_PTE_SET(*pPteDst, pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P);
1699# endif
1700 }
1701# endif
1702# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1703 else
1704 {
1705 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1706 SHW_PTE_SET(*pPteDst, 0);
1707 }
1708 /** @todo count these kinds of entries. */
1709}
1710
1711
1712/**
1713 * Creates a 4K shadow page for a guest page.
1714 *
1715 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1716 * physical address. The PdeSrc argument only the flags are used. No page
1717 * structured will be mapped in this function.
1718 *
1719 * @param pVCpu The cross context virtual CPU structure.
1720 * @param pPteDst Destination page table entry.
1721 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1722 * Can safely assume that only the flags are being used.
1723 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1724 * @param pShwPage Pointer to the shadow page.
1725 * @param iPTDst The index into the shadow table.
1726 *
1727 * @remark Not used for 2/4MB pages!
1728 */
1729# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
1730static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1731 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1732# else
1733static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage,
1734 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1735# endif
1736{
1737 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1738 RTGCPHYS GCPhysOldPage = NIL_RTGCPHYS;
1739
1740# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1741 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1742 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1743
1744 if (pShwPage->fDirty)
1745 {
1746 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1747 PGSTPT pGstPT;
1748
1749 /* Note that iPTDst can be used to index the guest PT even in the pae/32bit combo as we copy only half the table; see pgmPoolAddDirtyPage. */
1750 pGstPT = (PGSTPT)&pPool->aDirtyPages[pShwPage->idxDirtyEntry].aPage[0];
1751 GCPhysOldPage = GST_GET_PTE_GCPHYS(pGstPT->a[iPTDst]);
1752 pGstPT->a[iPTDst].u = PteSrc.u;
1753 }
1754# else
1755 Assert(!pShwPage->fDirty);
1756# endif
1757
1758# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1759 if ( (PteSrc.u & X86_PTE_P)
1760 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1761# endif
1762 {
1763# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1764 RTGCPHYS GCPhysPage = GST_GET_PTE_GCPHYS(PteSrc);
1765# endif
1766 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysPage);
1767
1768 /*
1769 * Find the ram range.
1770 */
1771 PPGMPAGE pPage;
1772 int rc = pgmPhysGetPageEx(pVM, GCPhysPage, &pPage);
1773 if (RT_SUCCESS(rc))
1774 {
1775 /* Ignore ballooned pages.
1776 Don't return errors or use a fatal assert here as part of a
1777 shadow sync range might included ballooned pages. */
1778 if (PGM_PAGE_IS_BALLOONED(pPage))
1779 {
1780 Assert(!SHW_PTE_IS_P(*pPteDst)); /** @todo user tracking needs updating if this triggers. */
1781 return;
1782 }
1783
1784# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1785 /* Make the page writable if necessary. */
1786 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1787 && ( PGM_PAGE_IS_ZERO(pPage)
1788# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1789 || ( (PteSrc.u & X86_PTE_RW)
1790# else
1791 || ( 1
1792# endif
1793 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1794# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1795 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1796# endif
1797# ifdef VBOX_WITH_PAGE_SHARING
1798 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1799# endif
1800 )
1801 )
1802 )
1803 {
1804 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhysPage);
1805 AssertRC(rc);
1806 }
1807# endif
1808
1809 /*
1810 * Make page table entry.
1811 */
1812 SHWPTE PteDst;
1813# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1814 uint64_t fGstShwPteFlags = GST_GET_PTE_SHW_FLAGS(pVCpu, PteSrc);
1815# else
1816 uint64_t fGstShwPteFlags = X86_PTE_P | X86_PTE_RW | X86_PTE_US | X86_PTE_A | X86_PTE_D;
1817# endif
1818 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1819 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, fGstShwPteFlags, &PteDst);
1820 else
1821 {
1822# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1823 /*
1824 * If the page or page directory entry is not marked accessed,
1825 * we mark the page not present.
1826 */
1827 if (!(PteSrc.u & X86_PTE_A) || !(PdeSrc.u & X86_PDE_A))
1828 {
1829 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1830 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,AccessedPage));
1831 SHW_PTE_SET(PteDst, 0);
1832 }
1833 /*
1834 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1835 * when the page is modified.
1836 */
1837 else if (!(PteSrc.u & X86_PTE_D) && (PdeSrc.u & PteSrc.u & X86_PTE_RW))
1838 {
1839 AssertCompile(X86_PTE_RW == X86_PDE_RW);
1840 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPage));
1841 SHW_PTE_SET(PteDst,
1842 fGstShwPteFlags
1843 | PGM_PAGE_GET_HCPHYS(pPage)
1844 | PGM_PTFLAGS_TRACK_DIRTY);
1845 SHW_PTE_SET_RO(PteDst);
1846 }
1847 else
1848# endif
1849 {
1850 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageSkipped));
1851# if PGM_SHW_TYPE == PGM_TYPE_EPT
1852 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage)
1853 | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
1854# else
1855 SHW_PTE_SET(PteDst, fGstShwPteFlags | PGM_PAGE_GET_HCPHYS(pPage));
1856# endif
1857 }
1858
1859 /*
1860 * Make sure only allocated pages are mapped writable.
1861 */
1862 if ( SHW_PTE_IS_P_RW(PteDst)
1863 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1864 {
1865 /* Still applies to shared pages. */
1866 Assert(!PGM_PAGE_IS_ZERO(pPage));
1867 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet. Why, isn't it? */
1868 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", GCPhysPage, pPage, iPTDst));
1869 }
1870 }
1871
1872 /*
1873 * Keep user track up to date.
1874 */
1875 if (SHW_PTE_IS_P(PteDst))
1876 {
1877 if (!SHW_PTE_IS_P(*pPteDst))
1878 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1879 else if (SHW_PTE_GET_HCPHYS(*pPteDst) != SHW_PTE_GET_HCPHYS(PteDst))
1880 {
1881 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst), SHW_PTE_LOG64(PteDst)));
1882 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1883 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1884 }
1885 }
1886 else if (SHW_PTE_IS_P(*pPteDst))
1887 {
1888 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1889 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1890 }
1891
1892 /*
1893 * Update statistics and commit the entry.
1894 */
1895# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1896 if (!(PteSrc.u & X86_PTE_G))
1897 pShwPage->fSeenNonGlobal = true;
1898# endif
1899 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
1900 return;
1901 }
1902
1903/** @todo count these three different kinds. */
1904 Log2(("SyncPageWorker: invalid address in Pte\n"));
1905 }
1906# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1907 else if (!(PteSrc.u & X86_PTE_P))
1908 Log2(("SyncPageWorker: page not present in Pte\n"));
1909 else
1910 Log2(("SyncPageWorker: invalid Pte\n"));
1911# endif
1912
1913 /*
1914 * The page is not present or the PTE is bad. Replace the shadow PTE by
1915 * an empty entry, making sure to keep the user tracking up to date.
1916 */
1917 if (SHW_PTE_IS_P(*pPteDst))
1918 {
1919 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1920 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1921 }
1922 SHW_PTE_ATOMIC_SET(*pPteDst, 0);
1923}
1924
1925
1926/**
1927 * Syncs a guest OS page.
1928 *
1929 * There are no conflicts at this point, neither is there any need for
1930 * page table allocations.
1931 *
1932 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
1933 * When called in AMD64 guest mode, the guest PML4E shall be valid.
1934 *
1935 * @returns VBox status code.
1936 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1937 * @param pVCpu The cross context virtual CPU structure.
1938 * @param PdeSrc Page directory entry of the guest.
1939 * @param GCPtrPage Guest context page address.
1940 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1941 * @param uErr Fault error (X86_TRAP_PF_*).
1942 */
1943static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1944{
1945 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1946 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
1947 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1948 RT_NOREF_PV(uErr); RT_NOREF_PV(cPages); RT_NOREF_PV(GCPtrPage);
1949
1950 PGM_LOCK_ASSERT_OWNER(pVM);
1951
1952# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1953 || PGM_GST_TYPE == PGM_TYPE_PAE \
1954 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1955 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE)
1956
1957 /*
1958 * Assert preconditions.
1959 */
1960 Assert(PdeSrc.u & X86_PDE_P);
1961 Assert(cPages);
1962# if 0 /* rarely useful; leave for debugging. */
1963 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1964# endif
1965
1966 /*
1967 * Get the shadow PDE, find the shadow page table in the pool.
1968 */
1969# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1970 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1971 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1972
1973 /* Fetch the pgm pool shadow descriptor. */
1974 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1975 Assert(pShwPde);
1976
1977# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1978 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1979 PPGMPOOLPAGE pShwPde = NULL;
1980 PX86PDPAE pPDDst;
1981
1982 /* Fetch the pgm pool shadow descriptor. */
1983 int rc2 = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
1984 AssertRCSuccessReturn(rc2, rc2);
1985 Assert(pShwPde);
1986
1987 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1988 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1989
1990# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1991 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1992 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1993 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1994 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1995
1996 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1997 AssertRCSuccessReturn(rc2, rc2);
1998 Assert(pPDDst && pPdptDst);
1999 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
2000# endif
2001 SHWPDE PdeDst = *pPdeDst;
2002
2003 /*
2004 * - In the guest SMP case we could have blocked while another VCPU reused
2005 * this page table.
2006 * - With W7-64 we may also take this path when the A bit is cleared on
2007 * higher level tables (PDPE/PML4E). The guest does not invalidate the
2008 * relevant TLB entries. If we're write monitoring any page mapped by
2009 * the modified entry, we may end up here with a "stale" TLB entry.
2010 */
2011 if (!(PdeDst.u & X86_PDE_P))
2012 {
2013 Log(("CPU%u: SyncPage: Pde at %RGv changed behind our back? (pPdeDst=%p/%RX64) uErr=%#x\n", pVCpu->idCpu, GCPtrPage, pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
2014 AssertMsg(pVM->cCpus > 1 || (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW),
2015 ("Unexpected missing PDE p=%p/%RX64 uErr=%#x\n", pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
2016 if (uErr & X86_TRAP_PF_P)
2017 PGM_INVL_PG(pVCpu, GCPtrPage);
2018 return VINF_SUCCESS; /* force the instruction to be executed again. */
2019 }
2020
2021 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2022 Assert(pShwPage);
2023
2024# if PGM_GST_TYPE == PGM_TYPE_AMD64
2025 /* Fetch the pgm pool shadow descriptor. */
2026 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2027 Assert(pShwPde);
2028# endif
2029
2030 /*
2031 * Check that the page is present and that the shadow PDE isn't out of sync.
2032 */
2033 const bool fBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
2034 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
2035 RTGCPHYS GCPhys;
2036 if (!fBigPage)
2037 {
2038 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
2039# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2040 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2041 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
2042# endif
2043 }
2044 else
2045 {
2046 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
2047# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2048 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2049 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
2050# endif
2051 }
2052 /** @todo This doesn't check the G bit of 2/4MB pages. FIXME */
2053 if ( fPdeValid
2054 && pShwPage->GCPhys == GCPhys
2055 && (PdeSrc.u & X86_PDE_P)
2056 && (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
2057 && ((PdeSrc.u & X86_PDE_RW) == (PdeDst.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW))
2058# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2059 && ((PdeSrc.u & X86_PDE_PAE_NX) == (PdeDst.u & X86_PDE_PAE_NX) || !GST_IS_NX_ACTIVE(pVCpu))
2060# endif
2061 )
2062 {
2063 /*
2064 * Check that the PDE is marked accessed already.
2065 * Since we set the accessed bit *before* getting here on a #PF, this
2066 * check is only meant for dealing with non-#PF'ing paths.
2067 */
2068 if (PdeSrc.u & X86_PDE_A)
2069 {
2070 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2071 if (!fBigPage)
2072 {
2073 /*
2074 * 4KB Page - Map the guest page table.
2075 */
2076 PGSTPT pPTSrc;
2077 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
2078 if (RT_SUCCESS(rc))
2079 {
2080# ifdef PGM_SYNC_N_PAGES
2081 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2082 if ( cPages > 1
2083 && !(uErr & X86_TRAP_PF_P)
2084 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2085 {
2086 /*
2087 * This code path is currently only taken when the caller is PGMTrap0eHandler
2088 * for non-present pages!
2089 *
2090 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2091 * deal with locality.
2092 */
2093 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2094# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2095 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2096 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2097# else
2098 const unsigned offPTSrc = 0;
2099# endif
2100 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2101 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2102 iPTDst = 0;
2103 else
2104 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2105
2106 for (; iPTDst < iPTDstEnd; iPTDst++)
2107 {
2108 const PGSTPTE pPteSrc = &pPTSrc->a[offPTSrc + iPTDst];
2109
2110 if ( (pPteSrc->u & X86_PTE_P)
2111 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2112 {
2113 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT))
2114 | ((offPTSrc + iPTDst) << GUEST_PAGE_SHIFT);
2115 NOREF(GCPtrCurPage);
2116 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, *pPteSrc, pShwPage, iPTDst);
2117 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2118 GCPtrCurPage, pPteSrc->u & X86_PTE_P,
2119 !!(pPteSrc->u & PdeSrc.u & X86_PTE_RW),
2120 !!(pPteSrc->u & PdeSrc.u & X86_PTE_US),
2121 (uint64_t)pPteSrc->u,
2122 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2123 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2124 }
2125 }
2126 }
2127 else
2128# endif /* PGM_SYNC_N_PAGES */
2129 {
2130 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
2131 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2132 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2133 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2134 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
2135 GCPtrPage, PteSrc.u & X86_PTE_P,
2136 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
2137 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
2138 (uint64_t)PteSrc.u,
2139 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2140 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2141 }
2142 }
2143 else /* MMIO or invalid page: emulated in #PF handler. */
2144 {
2145 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
2146 Assert(!SHW_PTE_IS_P(pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK]));
2147 }
2148 }
2149 else
2150 {
2151 /*
2152 * 4/2MB page - lazy syncing shadow 4K pages.
2153 * (There are many causes of getting here, it's no longer only CSAM.)
2154 */
2155 /* Calculate the GC physical address of this 4KB shadow page. */
2156 GCPhys = PGM_A20_APPLY(pVCpu, GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK));
2157 /* Find ram range. */
2158 PPGMPAGE pPage;
2159 int rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
2160 if (RT_SUCCESS(rc))
2161 {
2162 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
2163
2164# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2165 /* Try to make the page writable if necessary. */
2166 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2167 && ( PGM_PAGE_IS_ZERO(pPage)
2168 || ( (PdeSrc.u & X86_PDE_RW)
2169 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2170# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2171 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2172# endif
2173# ifdef VBOX_WITH_PAGE_SHARING
2174 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2175# endif
2176 )
2177 )
2178 )
2179 {
2180 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2181 AssertRC(rc);
2182 }
2183# endif
2184
2185 /*
2186 * Make shadow PTE entry.
2187 */
2188 SHWPTE PteDst;
2189 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2190 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc), &PteDst);
2191 else
2192 SHW_PTE_SET(PteDst, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc) | PGM_PAGE_GET_HCPHYS(pPage));
2193
2194 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2195 if ( SHW_PTE_IS_P(PteDst)
2196 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2197 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2198
2199 /* Make sure only allocated pages are mapped writable. */
2200 if ( SHW_PTE_IS_P_RW(PteDst)
2201 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2202 {
2203 /* Still applies to shared pages. */
2204 Assert(!PGM_PAGE_IS_ZERO(pPage));
2205 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2206 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
2207 }
2208
2209 SHW_PTE_ATOMIC_SET2(pPTDst->a[iPTDst], PteDst);
2210
2211 /*
2212 * If the page is not flagged as dirty and is writable, then make it read-only
2213 * at PD level, so we can set the dirty bit when the page is modified.
2214 *
2215 * ASSUMES that page access handlers are implemented on page table entry level.
2216 * Thus we will first catch the dirty access and set PDE.D and restart. If
2217 * there is an access handler, we'll trap again and let it work on the problem.
2218 */
2219 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
2220 * As for invlpg, it simply frees the whole shadow PT.
2221 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
2222 if ((PdeSrc.u & (X86_PDE4M_D | X86_PDE_RW)) == X86_PDE_RW)
2223 {
2224 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2225 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2226 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2227 }
2228 else
2229 {
2230 PdeDst.u &= ~(SHWUINT)(PGM_PDFLAGS_TRACK_DIRTY | X86_PDE_RW);
2231 PdeDst.u |= PdeSrc.u & X86_PDE_RW;
2232 }
2233 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2234 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
2235 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US),
2236 (uint64_t)PdeSrc.u, GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2237 }
2238 else
2239 {
2240 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
2241 /** @todo must wipe the shadow page table entry in this
2242 * case. */
2243 }
2244 }
2245 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2246 return VINF_SUCCESS;
2247 }
2248
2249 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDNAs));
2250 }
2251 else if (fPdeValid)
2252 {
2253 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2254 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2255 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2256 }
2257 else
2258 {
2259/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSyncAndInvalid));
2260 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2261 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2262 }
2263
2264 /*
2265 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
2266 * Yea, I'm lazy.
2267 */
2268 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
2269 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
2270
2271 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2272 PGM_INVL_VCPU_TLBS(pVCpu);
2273 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
2274
2275
2276# elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2277 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
2278 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
2279 NOREF(PdeSrc);
2280
2281# ifdef PGM_SYNC_N_PAGES
2282 /*
2283 * Get the shadow PDE, find the shadow page table in the pool.
2284 */
2285# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2286 X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
2287
2288# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2289 X86PDEPAE PdeDst = pgmShwGetPaePDE(pVCpu, GCPtrPage);
2290
2291# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2292 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2293 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2294 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2295 X86PDEPAE PdeDst;
2296 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2297
2298 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2299 AssertRCSuccessReturn(rc, rc);
2300 Assert(pPDDst && pPdptDst);
2301 PdeDst = pPDDst->a[iPDDst];
2302
2303# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2304 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2305 PEPTPD pPDDst;
2306 EPTPDE PdeDst;
2307
2308 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2309 if (rc != VINF_SUCCESS)
2310 {
2311 AssertRC(rc);
2312 return rc;
2313 }
2314 Assert(pPDDst);
2315 PdeDst = pPDDst->a[iPDDst];
2316# endif
2317 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2318 if (!SHW_PDE_IS_P(PdeDst))
2319 {
2320 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2321 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2322 return VINF_SUCCESS; /* force the instruction to be executed again. */
2323 }
2324
2325 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2326 if (SHW_PDE_IS_BIG(PdeDst))
2327 {
2328 Assert(pVM->pgm.s.fNestedPaging);
2329 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2330 return VINF_SUCCESS;
2331 }
2332
2333 /* Mask away the page offset. */
2334 GCPtrPage &= ~((RTGCPTR)0xfff);
2335
2336 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2337 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2338
2339 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2340 if ( cPages > 1
2341 && !(uErr & X86_TRAP_PF_P)
2342 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2343 {
2344 /*
2345 * This code path is currently only taken when the caller is PGMTrap0eHandler
2346 * for non-present pages!
2347 *
2348 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2349 * deal with locality.
2350 */
2351 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2352 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2353 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2354 iPTDst = 0;
2355 else
2356 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2357 for (; iPTDst < iPTDstEnd; iPTDst++)
2358 {
2359 if (!SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2360 {
2361 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2362 | (iPTDst << GUEST_PAGE_SHIFT));
2363
2364 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2365 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
2366 GCPtrCurPage,
2367 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2368 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2369
2370 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
2371 break;
2372 }
2373 else
2374 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n",
2375 (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << GUEST_PAGE_SHIFT), iPTDst, SHW_PTE_LOG64(pPTDst->a[iPTDst]) ));
2376 }
2377 }
2378 else
2379# endif /* PGM_SYNC_N_PAGES */
2380 {
2381 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2382 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2383 | (iPTDst << GUEST_PAGE_SHIFT));
2384
2385 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2386
2387 Log2(("SyncPage: 4K %RGv PteSrc:{P=1 RW=1 U=1}PteDst=%08llx%s\n",
2388 GCPtrPage,
2389 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2390 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2391 }
2392 return VINF_SUCCESS;
2393
2394# else
2395 NOREF(PdeSrc);
2396 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2397 return VERR_PGM_NOT_USED_IN_MODE;
2398# endif
2399}
2400
2401#endif /* PGM_SHW_TYPE != PGM_TYPE_NONE */
2402#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
2403
2404/**
2405 * Handle dirty bit tracking faults.
2406 *
2407 * @returns VBox status code.
2408 * @param pVCpu The cross context virtual CPU structure.
2409 * @param uErr Page fault error code.
2410 * @param pPdeSrc Guest page directory entry.
2411 * @param pPdeDst Shadow page directory entry.
2412 * @param GCPtrPage Guest context page address.
2413 */
2414static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc,
2415 RTGCPTR GCPtrPage)
2416{
2417 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2418 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2419 NOREF(uErr);
2420
2421 PGM_LOCK_ASSERT_OWNER(pVM);
2422
2423 /*
2424 * Handle big page.
2425 */
2426 if ((pPdeSrc->u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu))
2427 {
2428 if ((pPdeDst->u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
2429 {
2430 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
2431 Assert(pPdeSrc->u & X86_PDE_RW);
2432
2433 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2434 * fault again and take this path to only invalidate the entry (see below). */
2435 SHWPDE PdeDst = *pPdeDst;
2436 PdeDst.u &= ~(SHWUINT)PGM_PDFLAGS_TRACK_DIRTY;
2437 PdeDst.u |= X86_PDE_RW | X86_PDE_A;
2438 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2439 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2440 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2441 }
2442
2443# ifdef IN_RING0
2444 /* Check for stale TLB entry; only applies to the SMP guest case. */
2445 if ( pVM->cCpus > 1
2446 && (pPdeDst->u & (X86_PDE_P | X86_PDE_RW | X86_PDE_A)) == (X86_PDE_P | X86_PDE_RW | X86_PDE_A))
2447 {
2448 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2449 if (pShwPage)
2450 {
2451 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2452 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2453 if (SHW_PTE_IS_P_RW(*pPteDst))
2454 {
2455 /* Stale TLB entry. */
2456 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
2457 PGM_INVL_PG(pVCpu, GCPtrPage);
2458 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2459 }
2460 }
2461 }
2462# endif /* IN_RING0 */
2463 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2464 }
2465
2466 /*
2467 * Map the guest page table.
2468 */
2469 PGSTPT pPTSrc;
2470 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(*pPdeSrc), &pPTSrc);
2471 AssertRCReturn(rc, rc);
2472
2473 if (SHW_PDE_IS_P(*pPdeDst))
2474 {
2475 GSTPTE const *pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2476 const GSTPTE PteSrc = *pPteSrc;
2477
2478 /*
2479 * Map shadow page table.
2480 */
2481 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2482 if (pShwPage)
2483 {
2484 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2485 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2486 if (SHW_PTE_IS_P(*pPteDst)) /** @todo Optimize accessed bit emulation? */
2487 {
2488 if (SHW_PTE_IS_TRACK_DIRTY(*pPteDst))
2489 {
2490 PPGMPAGE pPage = pgmPhysGetPage(pVM, GST_GET_PTE_GCPHYS(PteSrc));
2491 SHWPTE PteDst = *pPteDst;
2492
2493 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2494 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
2495
2496 Assert(PteSrc.u & X86_PTE_RW);
2497
2498 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
2499 * entry will not harm; write access will simply fault again and
2500 * take this path to only invalidate the entry.
2501 */
2502 if (RT_LIKELY(pPage))
2503 {
2504 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2505 {
2506 //AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
2507 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
2508 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2509 SHW_PTE_SET_RO(PteDst);
2510 }
2511 else
2512 {
2513 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2514 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2515 {
2516 rc = pgmPhysPageMakeWritable(pVM, pPage, GST_GET_PTE_GCPHYS(PteSrc));
2517 AssertRC(rc);
2518 }
2519 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2520 SHW_PTE_SET_RW(PteDst);
2521 else
2522 {
2523 /* Still applies to shared pages. */
2524 Assert(!PGM_PAGE_IS_ZERO(pPage));
2525 SHW_PTE_SET_RO(PteDst);
2526 }
2527 }
2528 }
2529 else
2530 SHW_PTE_SET_RW(PteDst); /** @todo r=bird: This doesn't make sense to me. */
2531
2532 SHW_PTE_SET(PteDst, (SHW_PTE_GET_U(PteDst) | X86_PTE_D | X86_PTE_A) & ~(uint64_t)PGM_PTFLAGS_TRACK_DIRTY);
2533 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
2534 PGM_INVL_PG(pVCpu, GCPtrPage);
2535 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2536 }
2537
2538# ifdef IN_RING0
2539 /* Check for stale TLB entry; only applies to the SMP guest case. */
2540 if ( pVM->cCpus > 1
2541 && SHW_PTE_IS_RW(*pPteDst)
2542 && SHW_PTE_IS_A(*pPteDst))
2543 {
2544 /* Stale TLB entry. */
2545 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
2546 PGM_INVL_PG(pVCpu, GCPtrPage);
2547 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2548 }
2549# endif
2550 }
2551 }
2552 else
2553 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2554 }
2555
2556 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2557}
2558
2559#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
2560
2561/**
2562 * Sync a shadow page table.
2563 *
2564 * The shadow page table is not present in the shadow PDE.
2565 *
2566 * Handles mapping conflicts.
2567 *
2568 * This is called by VerifyAccessSyncPage, PrefetchPage, InvalidatePage (on
2569 * conflict), and Trap0eHandler.
2570 *
2571 * A precondition for this method is that the shadow PDE is not present. The
2572 * caller must take the PGM lock before checking this and continue to hold it
2573 * when calling this method.
2574 *
2575 * @returns VBox status code.
2576 * @param pVCpu The cross context virtual CPU structure.
2577 * @param iPDSrc Page directory index.
2578 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2579 * Assume this is a temporary mapping.
2580 * @param GCPtrPage GC Pointer of the page that caused the fault
2581 */
2582static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2583{
2584 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2585 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2586
2587#if 0 /* rarely useful; leave for debugging. */
2588 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2589#endif
2590 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2591
2592 PGM_LOCK_ASSERT_OWNER(pVM);
2593
2594#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2595 || PGM_GST_TYPE == PGM_TYPE_PAE \
2596 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2597 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
2598 && PGM_SHW_TYPE != PGM_TYPE_NONE
2599 int rc = VINF_SUCCESS;
2600
2601 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2602
2603 /*
2604 * Some input validation first.
2605 */
2606 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2607
2608 /*
2609 * Get the relevant shadow PDE entry.
2610 */
2611# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2612 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2613 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
2614
2615 /* Fetch the pgm pool shadow descriptor. */
2616 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2617 Assert(pShwPde);
2618
2619# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2620 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2621 PPGMPOOLPAGE pShwPde = NULL;
2622 PX86PDPAE pPDDst;
2623 PSHWPDE pPdeDst;
2624
2625 /* Fetch the pgm pool shadow descriptor. */
2626 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
2627 AssertRCSuccessReturn(rc, rc);
2628 Assert(pShwPde);
2629
2630 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
2631 pPdeDst = &pPDDst->a[iPDDst];
2632
2633# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2634 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2635 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2636 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2637 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2638 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2639 AssertRCSuccessReturn(rc, rc);
2640 Assert(pPDDst);
2641 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2642
2643# endif
2644 SHWPDE PdeDst = *pPdeDst;
2645
2646# if PGM_GST_TYPE == PGM_TYPE_AMD64
2647 /* Fetch the pgm pool shadow descriptor. */
2648 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2649 Assert(pShwPde);
2650# endif
2651
2652 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P.*/
2653
2654 /*
2655 * Sync the page directory entry.
2656 */
2657 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2658 const bool fPageTable = !(PdeSrc.u & X86_PDE_PS) || !GST_IS_PSE_ACTIVE(pVCpu);
2659 if ( (PdeSrc.u & X86_PDE_P)
2660 && (fPageTable ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc)) )
2661 {
2662 /*
2663 * Allocate & map the page table.
2664 */
2665 PSHWPT pPTDst;
2666 PPGMPOOLPAGE pShwPage;
2667 RTGCPHYS GCPhys;
2668 if (fPageTable)
2669 {
2670 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
2671# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2672 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2673 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
2674# endif
2675 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
2676 pShwPde->idx, iPDDst, false /*fLockPage*/,
2677 &pShwPage);
2678 }
2679 else
2680 {
2681 PGMPOOLACCESS enmAccess;
2682# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2683 const bool fNoExecute = (PdeSrc.u & X86_PDE_PAE_NX) && GST_IS_NX_ACTIVE(pVCpu);
2684# else
2685 const bool fNoExecute = false;
2686# endif
2687
2688 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
2689# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2690 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2691 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
2692# endif
2693 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2694 if (PdeSrc.u & X86_PDE_US)
2695 {
2696 if (PdeSrc.u & X86_PDE_RW)
2697 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2698 else
2699 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2700 }
2701 else
2702 {
2703 if (PdeSrc.u & X86_PDE_RW)
2704 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2705 else
2706 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2707 }
2708 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, PGM_A20_IS_ENABLED(pVCpu),
2709 pShwPde->idx, iPDDst, false /*fLockPage*/,
2710 &pShwPage);
2711 }
2712 if (rc == VINF_SUCCESS)
2713 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2714 else if (rc == VINF_PGM_CACHED_PAGE)
2715 {
2716 /*
2717 * The PT was cached, just hook it up.
2718 */
2719 if (fPageTable)
2720 PdeDst.u = pShwPage->Core.Key | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2721 else
2722 {
2723 PdeDst.u = pShwPage->Core.Key | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2724 /* (see explanation and assumptions further down.) */
2725 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
2726 {
2727 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2728 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2729 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2730 }
2731 }
2732 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2733 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2734 return VINF_SUCCESS;
2735 }
2736 else
2737 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
2738 /** @todo Why do we bother preserving X86_PDE_AVL_MASK here?
2739 * Both PGM_PDFLAGS_MAPPING and PGM_PDFLAGS_TRACK_DIRTY should be
2740 * irrelevant at this point. */
2741 PdeDst.u &= X86_PDE_AVL_MASK;
2742 PdeDst.u |= pShwPage->Core.Key;
2743
2744 /*
2745 * Page directory has been accessed (this is a fault situation, remember).
2746 */
2747 /** @todo
2748 * Well, when the caller is PrefetchPage or InvalidatePage is isn't a
2749 * fault situation. What's more, the Trap0eHandler has already set the
2750 * accessed bit. So, it's actually just VerifyAccessSyncPage which
2751 * might need setting the accessed flag.
2752 *
2753 * The best idea is to leave this change to the caller and add an
2754 * assertion that it's set already. */
2755 pPDSrc->a[iPDSrc].u |= X86_PDE_A;
2756 if (fPageTable)
2757 {
2758 /*
2759 * Page table - 4KB.
2760 *
2761 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2762 */
2763 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2764 GCPtrPage, PdeSrc.u & X86_PTE_P, !!(PdeSrc.u & X86_PTE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u));
2765 PGSTPT pPTSrc;
2766 rc = PGM_GCPHYS_2_PTR(pVM, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
2767 if (RT_SUCCESS(rc))
2768 {
2769 /*
2770 * Start by syncing the page directory entry so CSAM's TLB trick works.
2771 */
2772 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2773 | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2774 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2775 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2776
2777 /*
2778 * Directory/page user or supervisor privilege: (same goes for read/write)
2779 *
2780 * Directory Page Combined
2781 * U/S U/S U/S
2782 * 0 0 0
2783 * 0 1 0
2784 * 1 0 0
2785 * 1 1 1
2786 *
2787 * Simple AND operation. Table listed for completeness.
2788 *
2789 */
2790 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4K));
2791# ifdef PGM_SYNC_N_PAGES
2792 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2793 unsigned iPTDst = iPTBase;
2794 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2795 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2796 iPTDst = 0;
2797 else
2798 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2799# else /* !PGM_SYNC_N_PAGES */
2800 unsigned iPTDst = 0;
2801 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2802# endif /* !PGM_SYNC_N_PAGES */
2803 RTGCPTR GCPtrCur = (GCPtrPage & ~(RTGCPTR)((1 << SHW_PD_SHIFT) - 1))
2804 | ((RTGCPTR)iPTDst << GUEST_PAGE_SHIFT);
2805# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2806 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2807 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2808# else
2809 const unsigned offPTSrc = 0;
2810# endif
2811 for (; iPTDst < iPTDstEnd; iPTDst++, GCPtrCur += GUEST_PAGE_SIZE)
2812 {
2813 const unsigned iPTSrc = iPTDst + offPTSrc;
2814 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2815 if (PteSrc.u & X86_PTE_P)
2816 {
2817 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2818 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2819 GCPtrCur,
2820 PteSrc.u & X86_PTE_P,
2821 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
2822 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
2823 (uint64_t)PteSrc.u,
2824 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : "", SHW_PTE_LOG64(pPTDst->a[iPTDst]), iPTSrc, PdeSrc.au32[0],
2825 (RTGCPHYS)(GST_GET_PDE_GCPHYS(PdeSrc) + iPTSrc*sizeof(PteSrc)) ));
2826 }
2827 /* else: the page table was cleared by the pool */
2828 } /* for PTEs */
2829 }
2830 }
2831 else
2832 {
2833 /*
2834 * Big page - 2/4MB.
2835 *
2836 * We'll walk the ram range list in parallel and optimize lookups.
2837 * We will only sync one shadow page table at a time.
2838 */
2839 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4M));
2840
2841 /**
2842 * @todo It might be more efficient to sync only a part of the 4MB
2843 * page (similar to what we do for 4KB PDs).
2844 */
2845
2846 /*
2847 * Start by syncing the page directory entry.
2848 */
2849 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2850 | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2851
2852 /*
2853 * If the page is not flagged as dirty and is writable, then make it read-only
2854 * at PD level, so we can set the dirty bit when the page is modified.
2855 *
2856 * ASSUMES that page access handlers are implemented on page table entry level.
2857 * Thus we will first catch the dirty access and set PDE.D and restart. If
2858 * there is an access handler, we'll trap again and let it work on the problem.
2859 */
2860 /** @todo move the above stuff to a section in the PGM documentation. */
2861 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2862 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
2863 {
2864 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2865 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2866 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2867 }
2868 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2869 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2870
2871 /*
2872 * Fill the shadow page table.
2873 */
2874 /* Get address and flags from the source PDE. */
2875 SHWPTE PteDstBase;
2876 SHW_PTE_SET(PteDstBase, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc));
2877
2878 /* Loop thru the entries in the shadow PT. */
2879 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2880 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2881 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u, GCPtr,
2882 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2883 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
2884 unsigned iPTDst = 0;
2885 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2886 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2887 {
2888 if (pRam && GCPhys >= pRam->GCPhys)
2889 {
2890# ifndef PGM_WITH_A20
2891 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT;
2892# endif
2893 do
2894 {
2895 /* Make shadow PTE. */
2896# ifdef PGM_WITH_A20
2897 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT];
2898# else
2899 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2900# endif
2901 SHWPTE PteDst;
2902
2903# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2904 /* Try to make the page writable if necessary. */
2905 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2906 && ( PGM_PAGE_IS_ZERO(pPage)
2907 || ( SHW_PTE_IS_RW(PteDstBase)
2908 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2909# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2910 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2911# endif
2912# ifdef VBOX_WITH_PAGE_SHARING
2913 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2914# endif
2915 && !PGM_PAGE_IS_BALLOONED(pPage))
2916 )
2917 )
2918 {
2919 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2920 AssertRCReturn(rc, rc);
2921 if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2922 break;
2923 }
2924# endif
2925
2926 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2927 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, SHW_PTE_GET_U(PteDstBase), &PteDst);
2928 else if (PGM_PAGE_IS_BALLOONED(pPage))
2929 SHW_PTE_SET(PteDst, 0); /* Handle ballooned pages at #PF time. */
2930 else
2931 SHW_PTE_SET(PteDst, PGM_PAGE_GET_HCPHYS(pPage) | SHW_PTE_GET_U(PteDstBase));
2932
2933 /* Only map writable pages writable. */
2934 if ( SHW_PTE_IS_P_RW(PteDst)
2935 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2936 {
2937 /* Still applies to shared pages. */
2938 Assert(!PGM_PAGE_IS_ZERO(pPage));
2939 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2940 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2941 }
2942
2943 if (SHW_PTE_IS_P(PteDst))
2944 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2945
2946 /* commit it (not atomic, new table) */
2947 pPTDst->a[iPTDst] = PteDst;
2948 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2949 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), SHW_PTE_IS_P(PteDst), SHW_PTE_IS_RW(PteDst), SHW_PTE_IS_US(PteDst), SHW_PTE_LOG64(PteDst),
2950 SHW_PTE_IS_TRACK_DIRTY(PteDst) ? " Track-Dirty" : ""));
2951
2952 /* advance */
2953 GCPhys += GUEST_PAGE_SIZE;
2954 PGM_A20_APPLY_TO_VAR(pVCpu, GCPhys);
2955# ifndef PGM_WITH_A20
2956 iHCPage++;
2957# endif
2958 iPTDst++;
2959 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2960 && GCPhys <= pRam->GCPhysLast);
2961
2962 /* Advance ram range list. */
2963 while (pRam && GCPhys > pRam->GCPhysLast)
2964 pRam = pRam->CTX_SUFF(pNext);
2965 }
2966 else if (pRam)
2967 {
2968 Log(("Invalid pages at %RGp\n", GCPhys));
2969 do
2970 {
2971 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2972 GCPhys += GUEST_PAGE_SIZE;
2973 iPTDst++;
2974 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2975 && GCPhys < pRam->GCPhys);
2976 PGM_A20_APPLY_TO_VAR(pVCpu,GCPhys);
2977 }
2978 else
2979 {
2980 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2981 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2982 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2983 }
2984 } /* while more PTEs */
2985 } /* 4KB / 4MB */
2986 }
2987 else
2988 AssertRelease(!SHW_PDE_IS_P(PdeDst));
2989
2990 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2991 if (RT_FAILURE(rc))
2992 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
2993 return rc;
2994
2995#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2996 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
2997 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2998 && PGM_SHW_TYPE != PGM_TYPE_NONE
2999 NOREF(iPDSrc); NOREF(pPDSrc);
3000
3001 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3002
3003 /*
3004 * Validate input a little bit.
3005 */
3006 int rc = VINF_SUCCESS;
3007# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3008 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3009 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3010
3011 /* Fetch the pgm pool shadow descriptor. */
3012 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
3013 Assert(pShwPde);
3014
3015# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3016 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3017 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
3018 PX86PDPAE pPDDst;
3019 PSHWPDE pPdeDst;
3020
3021 /* Fetch the pgm pool shadow descriptor. */
3022 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
3023 AssertRCSuccessReturn(rc, rc);
3024 Assert(pShwPde);
3025
3026 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
3027 pPdeDst = &pPDDst->a[iPDDst];
3028
3029# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3030 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
3031 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3032 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3033 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3034 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3035 AssertRCSuccessReturn(rc, rc);
3036 Assert(pPDDst);
3037 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3038
3039 /* Fetch the pgm pool shadow descriptor. */
3040 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3041 Assert(pShwPde);
3042
3043# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3044 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3045 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3046 PEPTPD pPDDst;
3047 PEPTPDPT pPdptDst;
3048
3049 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3050 if (rc != VINF_SUCCESS)
3051 {
3052 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3053 AssertRC(rc);
3054 return rc;
3055 }
3056 Assert(pPDDst);
3057 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3058
3059 /* Fetch the pgm pool shadow descriptor. */
3060 /** @todo r=bird: didn't pgmShwGetEPTPDPtr just do this lookup already? */
3061 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3062 Assert(pShwPde);
3063# endif
3064 SHWPDE PdeDst = *pPdeDst;
3065
3066 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3067
3068# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
3069 if (BTH_IS_NP_ACTIVE(pVM))
3070 {
3071 Assert(!VM_IS_NEM_ENABLED(pVM));
3072
3073 /* Check if we allocated a big page before for this 2 MB range. */
3074 PPGMPAGE pPage;
3075 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PDE2M_PAE_PG_MASK), &pPage);
3076 if (RT_SUCCESS(rc))
3077 {
3078 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3079 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3080 {
3081 if (PGM_A20_IS_ENABLED(pVCpu))
3082 {
3083 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3084 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3085 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3086 }
3087 else
3088 {
3089 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3090 pVM->pgm.s.cLargePagesDisabled++;
3091 }
3092 }
3093 else if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED
3094 && PGM_A20_IS_ENABLED(pVCpu))
3095 {
3096 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3097 rc = pgmPhysRecheckLargePage(pVM, GCPtrPage, pPage);
3098 if (RT_SUCCESS(rc))
3099 {
3100 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3101 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3102 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3103 }
3104 }
3105 else if ( PGMIsUsingLargePages(pVM)
3106 && PGM_A20_IS_ENABLED(pVCpu))
3107 {
3108 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3109 if (RT_SUCCESS(rc))
3110 {
3111 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3112 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3113 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3114 }
3115 else
3116 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3117 }
3118
3119 if (HCPhys != NIL_RTHCPHYS)
3120 {
3121# if PGM_SHW_TYPE == PGM_TYPE_EPT
3122 PdeDst.u = HCPhys | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_LEAF | EPT_E_IGNORE_PAT | EPT_E_MEMTYPE_WB
3123 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo do we need this? */;
3124# else
3125 PdeDst.u = HCPhys | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PS
3126 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo PGM_PD_FLAGS? */;
3127# endif
3128 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3129
3130 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3131 /* Add a reference to the first page only. */
3132 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3133
3134 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3135 return VINF_SUCCESS;
3136 }
3137 }
3138 }
3139# endif /* defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE */
3140
3141 /*
3142 * Allocate & map the page table.
3143 */
3144 PSHWPT pPTDst;
3145 PPGMPOOLPAGE pShwPage;
3146 RTGCPHYS GCPhys;
3147
3148 /* Virtual address = physical address */
3149 GCPhys = PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PAGE_4K_BASE_MASK);
3150 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE,
3151 PGM_A20_IS_ENABLED(pVCpu), pShwPde->idx, iPDDst, false /*fLockPage*/,
3152 &pShwPage);
3153 if ( rc == VINF_SUCCESS
3154 || rc == VINF_PGM_CACHED_PAGE)
3155 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
3156 else
3157 {
3158 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3159 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
3160 }
3161
3162 if (rc == VINF_SUCCESS)
3163 {
3164 /* New page table; fully set it up. */
3165 Assert(pPTDst);
3166
3167 /* Mask away the page offset. */
3168 GCPtrPage &= ~(RTGCPTR)GUEST_PAGE_OFFSET_MASK;
3169
3170 for (unsigned iPTDst = 0; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
3171 {
3172 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
3173 | (iPTDst << GUEST_PAGE_SHIFT));
3174
3175 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
3176 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
3177 GCPtrCurPage,
3178 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
3179 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
3180
3181 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
3182 break;
3183 }
3184 }
3185 else
3186 rc = VINF_SUCCESS; /* Cached entry; assume it's still fully valid. */
3187
3188 /* Save the new PDE. */
3189# if PGM_SHW_TYPE == PGM_TYPE_EPT
3190 PdeDst.u = pShwPage->Core.Key | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE
3191 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo do we really need this? */);
3192# else
3193 PdeDst.u = pShwPage->Core.Key | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A
3194 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo use a PGM_PD_FLAGS define */);
3195# endif
3196 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3197
3198 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3199 if (RT_FAILURE(rc))
3200 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
3201 return rc;
3202
3203#else
3204 NOREF(iPDSrc); NOREF(pPDSrc);
3205 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3206 return VERR_PGM_NOT_USED_IN_MODE;
3207#endif
3208}
3209
3210
3211
3212/**
3213 * Prefetch a page/set of pages.
3214 *
3215 * Typically used to sync commonly used pages before entering raw mode
3216 * after a CR3 reload.
3217 *
3218 * @returns VBox status code.
3219 * @param pVCpu The cross context virtual CPU structure.
3220 * @param GCPtrPage Page to invalidate.
3221 */
3222PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
3223{
3224#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3225 || PGM_GST_TYPE == PGM_TYPE_REAL \
3226 || PGM_GST_TYPE == PGM_TYPE_PROT \
3227 || PGM_GST_TYPE == PGM_TYPE_PAE \
3228 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3229 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3230 && PGM_SHW_TYPE != PGM_TYPE_NONE
3231 /*
3232 * Check that all Guest levels thru the PDE are present, getting the
3233 * PD and PDE in the processes.
3234 */
3235 int rc = VINF_SUCCESS;
3236# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3237# if PGM_GST_TYPE == PGM_TYPE_32BIT
3238 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
3239 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3240# elif PGM_GST_TYPE == PGM_TYPE_PAE
3241 unsigned iPDSrc;
3242 X86PDPE PdpeSrc;
3243 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3244 if (!pPDSrc)
3245 return VINF_SUCCESS; /* not present */
3246# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3247 unsigned iPDSrc;
3248 PX86PML4E pPml4eSrc;
3249 X86PDPE PdpeSrc;
3250 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3251 if (!pPDSrc)
3252 return VINF_SUCCESS; /* not present */
3253# endif
3254 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3255# else
3256 PGSTPD pPDSrc = NULL;
3257 const unsigned iPDSrc = 0;
3258 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
3259# endif
3260
3261 if ((PdeSrc.u & (X86_PDE_P | X86_PDE_A)) == (X86_PDE_P | X86_PDE_A))
3262 {
3263 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3264 PGM_LOCK_VOID(pVM);
3265
3266# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3267 const X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
3268# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3269 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3270 PX86PDPAE pPDDst;
3271 X86PDEPAE PdeDst;
3272# if PGM_GST_TYPE != PGM_TYPE_PAE
3273 X86PDPE PdpeSrc;
3274
3275 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3276 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3277# endif
3278 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3279 if (rc != VINF_SUCCESS)
3280 {
3281 PGM_UNLOCK(pVM);
3282 AssertRC(rc);
3283 return rc;
3284 }
3285 Assert(pPDDst);
3286 PdeDst = pPDDst->a[iPDDst];
3287
3288# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3289 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3290 PX86PDPAE pPDDst;
3291 X86PDEPAE PdeDst;
3292
3293# if PGM_GST_TYPE == PGM_TYPE_PROT
3294 /* AMD-V nested paging */
3295 X86PML4E Pml4eSrc;
3296 X86PDPE PdpeSrc;
3297 PX86PML4E pPml4eSrc = &Pml4eSrc;
3298
3299 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3300 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3301 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3302# endif
3303
3304 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3305 if (rc != VINF_SUCCESS)
3306 {
3307 PGM_UNLOCK(pVM);
3308 AssertRC(rc);
3309 return rc;
3310 }
3311 Assert(pPDDst);
3312 PdeDst = pPDDst->a[iPDDst];
3313# endif
3314 if (!(PdeDst.u & X86_PDE_P))
3315 {
3316 /** @todo r=bird: This guy will set the A bit on the PDE,
3317 * probably harmless. */
3318 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3319 }
3320 else
3321 {
3322 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3323 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3324 * makes no sense to prefetch more than one page.
3325 */
3326 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3327 if (RT_SUCCESS(rc))
3328 rc = VINF_SUCCESS;
3329 }
3330 PGM_UNLOCK(pVM);
3331 }
3332 return rc;
3333
3334#elif PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3335 NOREF(pVCpu); NOREF(GCPtrPage);
3336 return VINF_SUCCESS; /* ignore */
3337#else
3338 AssertCompile(0);
3339#endif
3340}
3341
3342
3343
3344
3345/**
3346 * Syncs a page during a PGMVerifyAccess() call.
3347 *
3348 * @returns VBox status code (informational included).
3349 * @param pVCpu The cross context virtual CPU structure.
3350 * @param GCPtrPage The address of the page to sync.
3351 * @param fPage The effective guest page flags.
3352 * @param uErr The trap error code.
3353 * @remarks This will normally never be called on invalid guest page
3354 * translation entries.
3355 */
3356PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3357{
3358 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
3359
3360 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3361 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(fPage); RT_NOREF_PV(uErr);
3362
3363 Assert(!pVM->pgm.s.fNestedPaging);
3364#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3365 || PGM_GST_TYPE == PGM_TYPE_REAL \
3366 || PGM_GST_TYPE == PGM_TYPE_PROT \
3367 || PGM_GST_TYPE == PGM_TYPE_PAE \
3368 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3369 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3370 && PGM_SHW_TYPE != PGM_TYPE_NONE
3371
3372 /*
3373 * Get guest PD and index.
3374 */
3375 /** @todo Performance: We've done all this a jiffy ago in the
3376 * PGMGstGetPage call. */
3377# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3378# if PGM_GST_TYPE == PGM_TYPE_32BIT
3379 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
3380 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3381
3382# elif PGM_GST_TYPE == PGM_TYPE_PAE
3383 unsigned iPDSrc = 0;
3384 X86PDPE PdpeSrc;
3385 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3386 if (RT_UNLIKELY(!pPDSrc))
3387 {
3388 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3389 return VINF_EM_RAW_GUEST_TRAP;
3390 }
3391
3392# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3393 unsigned iPDSrc = 0; /* shut up gcc */
3394 PX86PML4E pPml4eSrc = NULL; /* ditto */
3395 X86PDPE PdpeSrc;
3396 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3397 if (RT_UNLIKELY(!pPDSrc))
3398 {
3399 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3400 return VINF_EM_RAW_GUEST_TRAP;
3401 }
3402# endif
3403
3404# else /* !PGM_WITH_PAGING */
3405 PGSTPD pPDSrc = NULL;
3406 const unsigned iPDSrc = 0;
3407# endif /* !PGM_WITH_PAGING */
3408 int rc = VINF_SUCCESS;
3409
3410 PGM_LOCK_VOID(pVM);
3411
3412 /*
3413 * First check if the shadow pd is present.
3414 */
3415# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3416 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3417
3418# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3419 PX86PDEPAE pPdeDst;
3420 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3421 PX86PDPAE pPDDst;
3422# if PGM_GST_TYPE != PGM_TYPE_PAE
3423 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3424 X86PDPE PdpeSrc;
3425 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3426# endif
3427 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3428 if (rc != VINF_SUCCESS)
3429 {
3430 PGM_UNLOCK(pVM);
3431 AssertRC(rc);
3432 return rc;
3433 }
3434 Assert(pPDDst);
3435 pPdeDst = &pPDDst->a[iPDDst];
3436
3437# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3438 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3439 PX86PDPAE pPDDst;
3440 PX86PDEPAE pPdeDst;
3441
3442# if PGM_GST_TYPE == PGM_TYPE_PROT
3443 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3444 X86PML4E Pml4eSrc;
3445 X86PDPE PdpeSrc;
3446 PX86PML4E pPml4eSrc = &Pml4eSrc;
3447 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3448 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3449# endif
3450
3451 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3452 if (rc != VINF_SUCCESS)
3453 {
3454 PGM_UNLOCK(pVM);
3455 AssertRC(rc);
3456 return rc;
3457 }
3458 Assert(pPDDst);
3459 pPdeDst = &pPDDst->a[iPDDst];
3460# endif
3461
3462 if (!(pPdeDst->u & X86_PDE_P))
3463 {
3464 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3465 if (rc != VINF_SUCCESS)
3466 {
3467 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3468 PGM_UNLOCK(pVM);
3469 AssertRC(rc);
3470 return rc;
3471 }
3472 }
3473
3474# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3475 /* Check for dirty bit fault */
3476 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3477 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3478 Log(("PGMVerifyAccess: success (dirty)\n"));
3479 else
3480# endif
3481 {
3482# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3483 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3484# else
3485 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
3486# endif
3487
3488 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3489 if (uErr & X86_TRAP_PF_US)
3490 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
3491 else /* supervisor */
3492 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3493
3494 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3495 if (RT_SUCCESS(rc))
3496 {
3497 /* Page was successfully synced */
3498 Log2(("PGMVerifyAccess: success (sync)\n"));
3499 rc = VINF_SUCCESS;
3500 }
3501 else
3502 {
3503 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
3504 rc = VINF_EM_RAW_GUEST_TRAP;
3505 }
3506 }
3507 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3508 PGM_UNLOCK(pVM);
3509 return rc;
3510
3511#else /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
3512
3513 AssertLogRelMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3514 return VERR_PGM_NOT_USED_IN_MODE;
3515#endif /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
3516}
3517
3518
3519/**
3520 * Syncs the paging hierarchy starting at CR3.
3521 *
3522 * @returns VBox status code, R0/RC may return VINF_PGM_SYNC_CR3, no other
3523 * informational status codes.
3524 * @retval VERR_PGM_NO_HYPERVISOR_ADDRESS in raw-mode when we're unable to map
3525 * the VMM into guest context.
3526 * @param pVCpu The cross context virtual CPU structure.
3527 * @param cr0 Guest context CR0 register.
3528 * @param cr3 Guest context CR3 register. Not subjected to the A20
3529 * mask.
3530 * @param cr4 Guest context CR4 register.
3531 * @param fGlobal Including global page directories or not
3532 */
3533PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3534{
3535 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
3536 NOREF(cr0); NOREF(cr3); NOREF(cr4); NOREF(fGlobal);
3537
3538 LogFlow(("SyncCR3 FF=%d fGlobal=%d\n", !!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), fGlobal));
3539
3540#if !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
3541# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3542 PGM_LOCK_VOID(pVM);
3543 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3544 if (pPool->cDirtyPages)
3545 pgmPoolResetDirtyPages(pVM);
3546 PGM_UNLOCK(pVM);
3547# endif
3548#endif /* !NESTED && !EPT */
3549
3550#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3551 /*
3552 * Nested / EPT / None - No work.
3553 */
3554 return VINF_SUCCESS;
3555
3556#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3557 /*
3558 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3559 * out the shadow parts when the guest modifies its tables.
3560 */
3561 return VINF_SUCCESS;
3562
3563#else /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3564
3565 return VINF_SUCCESS;
3566#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3567}
3568
3569
3570
3571
3572#ifdef VBOX_STRICT
3573
3574/**
3575 * Checks that the shadow page table is in sync with the guest one.
3576 *
3577 * @returns The number of errors.
3578 * @param pVCpu The cross context virtual CPU structure.
3579 * @param cr3 Guest context CR3 register.
3580 * @param cr4 Guest context CR4 register.
3581 * @param GCPtr Where to start. Defaults to 0.
3582 * @param cb How much to check. Defaults to everything.
3583 */
3584PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3585{
3586 NOREF(pVCpu); NOREF(cr3); NOREF(cr4); NOREF(GCPtr); NOREF(cb);
3587#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3588 return 0;
3589#else
3590 unsigned cErrors = 0;
3591 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3592 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3593
3594# if PGM_GST_TYPE == PGM_TYPE_PAE
3595 /** @todo currently broken; crashes below somewhere */
3596 AssertFailed();
3597# endif
3598
3599# if PGM_GST_TYPE == PGM_TYPE_32BIT \
3600 || PGM_GST_TYPE == PGM_TYPE_PAE \
3601 || PGM_GST_TYPE == PGM_TYPE_AMD64
3602
3603 bool fBigPagesSupported = GST_IS_PSE_ACTIVE(pVCpu);
3604 PPGMCPU pPGM = &pVCpu->pgm.s;
3605 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3606 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3607# ifndef IN_RING0
3608 RTHCPHYS HCPhys; /* general usage. */
3609# endif
3610 int rc;
3611
3612 /*
3613 * Check that the Guest CR3 and all its mappings are correct.
3614 */
3615 AssertMsgReturn(pPGM->GCPhysCR3 == PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK),
3616 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3617 false);
3618# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3619# if 0
3620# if PGM_GST_TYPE == PGM_TYPE_32BIT
3621 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3622# else
3623 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3624# endif
3625 AssertRCReturn(rc, 1);
3626 HCPhys = NIL_RTHCPHYS;
3627 rc = pgmRamGCPhys2HCPhys(pVM, PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK), &HCPhys);
3628 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3629# endif
3630# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3631 pgmGstGet32bitPDPtr(pVCpu);
3632 RTGCPHYS GCPhys;
3633 rc = PGMR3DbgR3Ptr2GCPhys(pVM->pUVM, pPGM->pGst32BitPdR3, &GCPhys);
3634 AssertRCReturn(rc, 1);
3635 AssertMsgReturn(PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3636# endif
3637# endif /* !IN_RING0 */
3638
3639 /*
3640 * Get and check the Shadow CR3.
3641 */
3642# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3643 unsigned cPDEs = X86_PG_ENTRIES;
3644 unsigned cIncrement = X86_PG_ENTRIES * GUEST_PAGE_SIZE;
3645# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3646# if PGM_GST_TYPE == PGM_TYPE_32BIT
3647 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3648# else
3649 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3650# endif
3651 unsigned cIncrement = X86_PG_PAE_ENTRIES * GUEST_PAGE_SIZE;
3652# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3653 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3654 unsigned cIncrement = X86_PG_PAE_ENTRIES * GUEST_PAGE_SIZE;
3655# endif
3656 if (cb != ~(RTGCPTR)0)
3657 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3658
3659/** @todo call the other two PGMAssert*() functions. */
3660
3661# if PGM_GST_TYPE == PGM_TYPE_AMD64
3662 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3663
3664 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3665 {
3666 PPGMPOOLPAGE pShwPdpt = NULL;
3667 PX86PML4E pPml4eSrc;
3668 PX86PML4E pPml4eDst;
3669 RTGCPHYS GCPhysPdptSrc;
3670
3671 pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
3672 pPml4eDst = pgmShwGetLongModePML4EPtr(pVCpu, iPml4);
3673
3674 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3675 if (!(pPml4eDst->u & X86_PML4E_P))
3676 {
3677 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3678 continue;
3679 }
3680
3681 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3682 GCPhysPdptSrc = PGM_A20_APPLY(pVCpu, pPml4eSrc->u & X86_PML4E_PG_MASK);
3683
3684 if ((pPml4eSrc->u & X86_PML4E_P) != (pPml4eDst->u & X86_PML4E_P))
3685 {
3686 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3687 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3688 cErrors++;
3689 continue;
3690 }
3691
3692 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3693 {
3694 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3695 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3696 cErrors++;
3697 continue;
3698 }
3699
3700 if ( (pPml4eDst->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX))
3701 != (pPml4eSrc->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX)))
3702 {
3703 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3704 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3705 cErrors++;
3706 continue;
3707 }
3708# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3709 {
3710# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3711
3712# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3713 /*
3714 * Check the PDPTEs too.
3715 */
3716 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3717
3718 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3719 {
3720 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3721 PPGMPOOLPAGE pShwPde = NULL;
3722 PX86PDPE pPdpeDst;
3723 RTGCPHYS GCPhysPdeSrc;
3724 X86PDPE PdpeSrc;
3725 PdpeSrc.u = 0; /* initialized to shut up gcc 4.5 */
3726# if PGM_GST_TYPE == PGM_TYPE_PAE
3727 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
3728 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
3729# else
3730 PX86PML4E pPml4eSrcIgn;
3731 PX86PDPT pPdptDst;
3732 PX86PDPAE pPDDst;
3733 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3734
3735 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3736 if (rc != VINF_SUCCESS)
3737 {
3738 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3739 GCPtr += 512 * _2M;
3740 continue; /* next PDPTE */
3741 }
3742 Assert(pPDDst);
3743# endif
3744 Assert(iPDSrc == 0);
3745
3746 pPdpeDst = &pPdptDst->a[iPdpt];
3747
3748 if (!(pPdpeDst->u & X86_PDPE_P))
3749 {
3750 GCPtr += 512 * _2M;
3751 continue; /* next PDPTE */
3752 }
3753
3754 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3755 GCPhysPdeSrc = PGM_A20_APPLY(pVCpu, PdpeSrc.u & X86_PDPE_PG_MASK);
3756
3757 if ((pPdpeDst->u & X86_PDPE_P) != (PdpeSrc.u & X86_PDPE_P))
3758 {
3759 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3760 GCPtr += 512 * _2M;
3761 cErrors++;
3762 continue;
3763 }
3764
3765 if (GCPhysPdeSrc != pShwPde->GCPhys)
3766 {
3767# if PGM_GST_TYPE == PGM_TYPE_AMD64
3768 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3769# else
3770 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3771# endif
3772 GCPtr += 512 * _2M;
3773 cErrors++;
3774 continue;
3775 }
3776
3777# if PGM_GST_TYPE == PGM_TYPE_AMD64
3778 if ( (pPdpeDst->u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX))
3779 != (PdpeSrc.u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX)))
3780 {
3781 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3782 GCPtr += 512 * _2M;
3783 cErrors++;
3784 continue;
3785 }
3786# endif
3787
3788# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3789 {
3790# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3791# if PGM_GST_TYPE == PGM_TYPE_32BIT
3792 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3793# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3794 PCX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
3795# endif
3796# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3797 /*
3798 * Iterate the shadow page directory.
3799 */
3800 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3801 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3802
3803 for (;
3804 iPDDst < cPDEs;
3805 iPDDst++, GCPtr += cIncrement)
3806 {
3807# if PGM_SHW_TYPE == PGM_TYPE_PAE
3808 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pVCpu, GCPtr);
3809# else
3810 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3811# endif
3812 if ( (PdeDst.u & X86_PDE_P)
3813 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) )
3814 {
3815 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3816 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3817 if (!pPoolPage)
3818 {
3819 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3820 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3821 cErrors++;
3822 continue;
3823 }
3824 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pPoolPage);
3825
3826 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3827 {
3828 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3829 GCPtr, (uint64_t)PdeDst.u));
3830 cErrors++;
3831 }
3832
3833 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3834 {
3835 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3836 GCPtr, (uint64_t)PdeDst.u));
3837 cErrors++;
3838 }
3839
3840 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3841 if (!(PdeSrc.u & X86_PDE_P))
3842 {
3843 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3844 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3845 cErrors++;
3846 continue;
3847 }
3848
3849 if ( !(PdeSrc.u & X86_PDE_PS)
3850 || !fBigPagesSupported)
3851 {
3852 GCPhysGst = GST_GET_PDE_GCPHYS(PdeSrc);
3853# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3854 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
3855# endif
3856 }
3857 else
3858 {
3859# if PGM_GST_TYPE == PGM_TYPE_32BIT
3860 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3861 {
3862 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3863 GCPtr, (uint64_t)PdeSrc.u));
3864 cErrors++;
3865 continue;
3866 }
3867# endif
3868 GCPhysGst = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
3869# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3870 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | (GCPtr & RT_BIT(X86_PAGE_2M_SHIFT)));
3871# endif
3872 }
3873
3874 if ( pPoolPage->enmKind
3875 != (!(PdeSrc.u & X86_PDE_PS) || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3876 {
3877 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3878 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3879 cErrors++;
3880 }
3881
3882 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
3883 if (!pPhysPage)
3884 {
3885 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3886 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3887 cErrors++;
3888 continue;
3889 }
3890
3891 if (GCPhysGst != pPoolPage->GCPhys)
3892 {
3893 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3894 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3895 cErrors++;
3896 continue;
3897 }
3898
3899 if ( !(PdeSrc.u & X86_PDE_PS)
3900 || !fBigPagesSupported)
3901 {
3902 /*
3903 * Page Table.
3904 */
3905 const GSTPT *pPTSrc;
3906 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, PGM_A20_APPLY(pVCpu, GCPhysGst & ~(RTGCPHYS)(GUEST_PAGE_SIZE - 1)),
3907 &pPTSrc);
3908 if (RT_FAILURE(rc))
3909 {
3910 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3911 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3912 cErrors++;
3913 continue;
3914 }
3915 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3916 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3917 {
3918 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3919 // (This problem will go away when/if we shadow multiple CR3s.)
3920 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3921 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3922 cErrors++;
3923 continue;
3924 }
3925 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3926 {
3927 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3928 GCPtr, (uint64_t)PdeDst.u));
3929 cErrors++;
3930 continue;
3931 }
3932
3933 /* iterate the page table. */
3934# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3935 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3936 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3937# else
3938 const unsigned offPTSrc = 0;
3939# endif
3940 for (unsigned iPT = 0, off = 0;
3941 iPT < RT_ELEMENTS(pPTDst->a);
3942 iPT++, off += GUEST_PAGE_SIZE)
3943 {
3944 const SHWPTE PteDst = pPTDst->a[iPT];
3945
3946 /* skip not-present and dirty tracked entries. */
3947 if (!(SHW_PTE_GET_U(PteDst) & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3948 continue;
3949 Assert(SHW_PTE_IS_P(PteDst));
3950
3951 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3952 if (!(PteSrc.u & X86_PTE_P))
3953 {
3954# ifdef IN_RING3
3955 PGMAssertHandlerAndFlagsInSync(pVM);
3956 DBGFR3PagingDumpEx(pVM->pUVM, pVCpu->idCpu, DBGFPGDMP_FLAGS_CURRENT_CR3 | DBGFPGDMP_FLAGS_CURRENT_MODE
3957 | DBGFPGDMP_FLAGS_GUEST | DBGFPGDMP_FLAGS_HEADER | DBGFPGDMP_FLAGS_PRINT_CR3,
3958 0, 0, UINT64_MAX, 99, NULL);
3959# endif
3960 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3961 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst), pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3962 (uint64_t)GST_GET_PDE_GCPHYS(PdeSrc) + (iPT + offPTSrc) * sizeof(PteSrc)));
3963 cErrors++;
3964 continue;
3965 }
3966
3967 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3968# if 1 /** @todo sync accessed bit properly... */
3969 fIgnoreFlags |= X86_PTE_A;
3970# endif
3971
3972 /* match the physical addresses */
3973 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
3974 GCPhysGst = GST_GET_PTE_GCPHYS(PteSrc);
3975
3976# ifdef IN_RING3
3977 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3978 if (RT_FAILURE(rc))
3979 {
3980# if 0
3981 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3982 {
3983 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3984 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3985 cErrors++;
3986 continue;
3987 }
3988# endif
3989 }
3990 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3991 {
3992 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3993 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3994 cErrors++;
3995 continue;
3996 }
3997# endif
3998
3999 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
4000 if (!pPhysPage)
4001 {
4002# if 0
4003 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4004 {
4005 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4006 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4007 cErrors++;
4008 continue;
4009 }
4010# endif
4011 if (SHW_PTE_IS_RW(PteDst))
4012 {
4013 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4014 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4015 cErrors++;
4016 }
4017 fIgnoreFlags |= X86_PTE_RW;
4018 }
4019 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4020 {
4021 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4022 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4023 cErrors++;
4024 continue;
4025 }
4026
4027 /* flags */
4028 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4029 {
4030 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4031 {
4032 if (SHW_PTE_IS_RW(PteDst))
4033 {
4034 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4035 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4036 cErrors++;
4037 continue;
4038 }
4039 fIgnoreFlags |= X86_PTE_RW;
4040 }
4041 else
4042 {
4043 if ( SHW_PTE_IS_P(PteDst)
4044# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4045 && !PGM_PAGE_IS_MMIO(pPhysPage)
4046# endif
4047 )
4048 {
4049 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4050 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4051 cErrors++;
4052 continue;
4053 }
4054 fIgnoreFlags |= X86_PTE_P;
4055 }
4056 }
4057 else
4058 {
4059 if ((PteSrc.u & (X86_PTE_RW | X86_PTE_D)) == X86_PTE_RW)
4060 {
4061 if (SHW_PTE_IS_RW(PteDst))
4062 {
4063 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4064 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4065 cErrors++;
4066 continue;
4067 }
4068 if (!SHW_PTE_IS_TRACK_DIRTY(PteDst))
4069 {
4070 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4071 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4072 cErrors++;
4073 continue;
4074 }
4075 if (SHW_PTE_IS_D(PteDst))
4076 {
4077 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4078 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4079 cErrors++;
4080 }
4081# if 0 /** @todo sync access bit properly... */
4082 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4083 {
4084 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4085 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4086 cErrors++;
4087 }
4088 fIgnoreFlags |= X86_PTE_RW;
4089# else
4090 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4091# endif
4092 }
4093 else if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
4094 {
4095 /* access bit emulation (not implemented). */
4096 if ((PteSrc.u & X86_PTE_A) || SHW_PTE_IS_P(PteDst))
4097 {
4098 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4099 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4100 cErrors++;
4101 continue;
4102 }
4103 if (!SHW_PTE_IS_A(PteDst))
4104 {
4105 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4106 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4107 cErrors++;
4108 }
4109 fIgnoreFlags |= X86_PTE_P;
4110 }
4111# ifdef DEBUG_sandervl
4112 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4113# endif
4114 }
4115
4116 if ( (PteSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4117 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4118 )
4119 {
4120 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4121 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4122 fIgnoreFlags, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4123 cErrors++;
4124 continue;
4125 }
4126 } /* foreach PTE */
4127 }
4128 else
4129 {
4130 /*
4131 * Big Page.
4132 */
4133 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4134 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
4135 {
4136 if (PdeDst.u & X86_PDE_RW)
4137 {
4138 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4139 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4140 cErrors++;
4141 continue;
4142 }
4143 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4144 {
4145 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4146 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4147 cErrors++;
4148 continue;
4149 }
4150# if 0 /** @todo sync access bit properly... */
4151 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4152 {
4153 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4154 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4155 cErrors++;
4156 }
4157 fIgnoreFlags |= X86_PTE_RW;
4158# else
4159 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4160# endif
4161 }
4162 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4163 {
4164 /* access bit emulation (not implemented). */
4165 if ((PdeSrc.u & X86_PDE_A) || SHW_PDE_IS_P(PdeDst))
4166 {
4167 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4168 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4169 cErrors++;
4170 continue;
4171 }
4172 if (!SHW_PDE_IS_A(PdeDst))
4173 {
4174 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4175 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4176 cErrors++;
4177 }
4178 fIgnoreFlags |= X86_PTE_P;
4179 }
4180
4181 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4182 {
4183 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4184 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4185 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4186 cErrors++;
4187 }
4188
4189 /* iterate the page table. */
4190 for (unsigned iPT = 0, off = 0;
4191 iPT < RT_ELEMENTS(pPTDst->a);
4192 iPT++, off += GUEST_PAGE_SIZE, GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst + GUEST_PAGE_SIZE))
4193 {
4194 const SHWPTE PteDst = pPTDst->a[iPT];
4195
4196 if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
4197 {
4198 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4199 GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4200 cErrors++;
4201 }
4202
4203 /* skip not-present entries. */
4204 if (!SHW_PTE_IS_P(PteDst)) /** @todo deal with ALL handlers and CSAM !P pages! */
4205 continue;
4206
4207 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4208
4209 /* match the physical addresses */
4210 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
4211
4212# ifdef IN_RING3
4213 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4214 if (RT_FAILURE(rc))
4215 {
4216# if 0
4217 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4218 {
4219 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4220 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4221 cErrors++;
4222 }
4223# endif
4224 }
4225 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4226 {
4227 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4228 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4229 cErrors++;
4230 continue;
4231 }
4232# endif
4233 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
4234 if (!pPhysPage)
4235 {
4236# if 0 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4237 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4238 {
4239 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4240 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4241 cErrors++;
4242 continue;
4243 }
4244# endif
4245 if (SHW_PTE_IS_RW(PteDst))
4246 {
4247 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4248 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4249 cErrors++;
4250 }
4251 fIgnoreFlags |= X86_PTE_RW;
4252 }
4253 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4254 {
4255 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4256 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4257 cErrors++;
4258 continue;
4259 }
4260
4261 /* flags */
4262 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4263 {
4264 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4265 {
4266 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4267 {
4268 if (SHW_PTE_IS_RW(PteDst))
4269 {
4270 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4271 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4272 cErrors++;
4273 continue;
4274 }
4275 fIgnoreFlags |= X86_PTE_RW;
4276 }
4277 }
4278 else
4279 {
4280 if ( SHW_PTE_IS_P(PteDst)
4281# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4282 && !PGM_PAGE_IS_MMIO(pPhysPage)
4283# endif
4284 )
4285 {
4286 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4287 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4288 cErrors++;
4289 continue;
4290 }
4291 fIgnoreFlags |= X86_PTE_P;
4292 }
4293 }
4294
4295 if ( (PdeSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4296 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags) /* lazy phys handler dereg. */
4297 )
4298 {
4299 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4300 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4301 fIgnoreFlags, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4302 cErrors++;
4303 continue;
4304 }
4305 } /* for each PTE */
4306 }
4307 }
4308 /* not present */
4309
4310 } /* for each PDE */
4311
4312 } /* for each PDPTE */
4313
4314 } /* for each PML4E */
4315
4316# ifdef DEBUG
4317 if (cErrors)
4318 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4319# endif
4320# endif /* GST is in {32BIT, PAE, AMD64} */
4321 return cErrors;
4322#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
4323}
4324#endif /* VBOX_STRICT */
4325
4326
4327/**
4328 * Sets up the CR3 for shadow paging
4329 *
4330 * @returns Strict VBox status code.
4331 * @retval VINF_SUCCESS.
4332 *
4333 * @param pVCpu The cross context virtual CPU structure.
4334 * @param GCPhysCR3 The physical address in the CR3 register. (A20 mask
4335 * already applied.)
4336 */
4337PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
4338{
4339 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4340 int rc = VINF_SUCCESS;
4341
4342 /* Update guest paging info. */
4343#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4344 || PGM_GST_TYPE == PGM_TYPE_PAE \
4345 || PGM_GST_TYPE == PGM_TYPE_AMD64
4346
4347 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4348 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
4349
4350# if PGM_GST_TYPE == PGM_TYPE_PAE
4351 if ( !pVCpu->pgm.s.CTX_SUFF(fPaePdpesAndCr3Mapped)
4352 || pVCpu->pgm.s.GCPhysPaeCR3 != GCPhysCR3)
4353# endif
4354 {
4355 /*
4356 * Map the page CR3 points at.
4357 */
4358 RTHCPTR HCPtrGuestCR3;
4359 rc = pgmGstMapCr3(pVCpu, GCPhysCR3, &HCPtrGuestCR3);
4360 if (RT_SUCCESS(rc))
4361 {
4362# if PGM_GST_TYPE == PGM_TYPE_32BIT
4363# ifdef IN_RING3
4364 pVCpu->pgm.s.pGst32BitPdR3 = (PX86PD)HCPtrGuestCR3;
4365 pVCpu->pgm.s.pGst32BitPdR0 = NIL_RTR0PTR;
4366# else
4367 pVCpu->pgm.s.pGst32BitPdR3 = NIL_RTR3PTR;
4368 pVCpu->pgm.s.pGst32BitPdR0 = (PX86PD)HCPtrGuestCR3;
4369# endif
4370
4371# elif PGM_GST_TYPE == PGM_TYPE_PAE
4372# ifdef IN_RING3
4373 pVCpu->pgm.s.pGstPaePdptR3 = (PX86PDPT)HCPtrGuestCR3;
4374 pVCpu->pgm.s.pGstPaePdptR0 = NIL_RTR0PTR;
4375# else
4376 pVCpu->pgm.s.pGstPaePdptR3 = NIL_RTR3PTR;
4377 pVCpu->pgm.s.pGstPaePdptR0 = (PX86PDPT)HCPtrGuestCR3;
4378# endif
4379
4380 /*
4381 * Update CPUM and map the 4 PDs too.
4382 */
4383 X86PDPE aGstPaePdpes[X86_PG_PAE_PDPE_ENTRIES];
4384 memcpy(&aGstPaePdpes, HCPtrGuestCR3, sizeof(aGstPaePdpes));
4385 CPUMSetGuestPaePdpes(pVCpu, &aGstPaePdpes[0]);
4386 PGMGstMapPaePdpes(pVCpu, &aGstPaePdpes[0]);
4387
4388 pVCpu->pgm.s.GCPhysPaeCR3 = GCPhysCR3;
4389# ifdef IN_RING3
4390 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = true;
4391 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = false;
4392# else
4393 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = false;
4394 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = true;
4395# endif
4396
4397# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4398# ifdef IN_RING3
4399 pVCpu->pgm.s.pGstAmd64Pml4R3 = (PX86PML4)HCPtrGuestCR3;
4400 pVCpu->pgm.s.pGstAmd64Pml4R0 = NIL_RTR0PTR;
4401# else
4402 pVCpu->pgm.s.pGstAmd64Pml4R3 = NIL_RTR3PTR;
4403 pVCpu->pgm.s.pGstAmd64Pml4R0 = (PX86PML4)HCPtrGuestCR3;
4404# endif
4405# endif
4406 }
4407 else
4408 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4409 }
4410#endif
4411
4412 /*
4413 * Update shadow paging info for guest modes with paging (32-bit, PAE, AMD64).
4414 */
4415# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4416 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4417 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4418 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4419 && PGM_GST_TYPE != PGM_TYPE_PROT))
4420
4421 Assert(!pVM->pgm.s.fNestedPaging);
4422 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
4423
4424 /*
4425 * Update the shadow root page as well since that's not fixed.
4426 */
4427 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4428 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4429 PPGMPOOLPAGE pNewShwPageCR3;
4430
4431 PGM_LOCK_VOID(pVM);
4432
4433# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4434 if (pPool->cDirtyPages)
4435 pgmPoolResetDirtyPages(pVM);
4436# endif
4437
4438 Assert(!(GCPhysCR3 >> (GUEST_PAGE_SHIFT + 32))); /** @todo what is this for? */
4439 int const rc2 = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE,
4440 PGM_A20_IS_ENABLED(pVCpu), NIL_PGMPOOL_IDX, UINT32_MAX, true /*fLockPage*/, &pNewShwPageCR3);
4441 AssertFatalRC(rc2);
4442
4443 pVCpu->pgm.s.pShwPageCR3R3 = pgmPoolConvertPageToR3(pPool, pNewShwPageCR3);
4444 pVCpu->pgm.s.pShwPageCR3R0 = pgmPoolConvertPageToR0(pPool, pNewShwPageCR3);
4445
4446 /* Set the current hypervisor CR3. */
4447 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4448
4449 /* Clean up the old CR3 root. */
4450 if ( pOldShwPageCR3
4451 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4452 {
4453 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4454
4455 /* Mark the page as unlocked; allow flushing again. */
4456 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4457
4458 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
4459 }
4460 PGM_UNLOCK(pVM);
4461# else
4462 NOREF(GCPhysCR3);
4463# endif
4464
4465 return rc;
4466}
4467
4468/**
4469 * Unmaps the shadow CR3.
4470 *
4471 * @returns VBox status, no specials.
4472 * @param pVCpu The cross context virtual CPU structure.
4473 */
4474PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu)
4475{
4476 LogFlow(("UnmapCR3\n"));
4477
4478 int rc = VINF_SUCCESS;
4479 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4480
4481 /*
4482 * Update guest paging info.
4483 */
4484#if PGM_GST_TYPE == PGM_TYPE_32BIT
4485 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4486 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4487
4488#elif PGM_GST_TYPE == PGM_TYPE_PAE
4489 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4490 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4491 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4492 {
4493 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4494 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4495 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4496 }
4497
4498#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4499 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4500 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4501
4502#else /* prot/real mode stub */
4503 /* nothing to do */
4504#endif
4505
4506 /*
4507 * Update second-level address translation info.
4508 */
4509#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4510 pVCpu->pgm.s.pGstEptPml4R3 = 0;
4511 pVCpu->pgm.s.pGstEptPml4R0 = 0;
4512#endif
4513
4514 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = false;
4515 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = false;
4516 pVCpu->pgm.s.GCPhysPaeCR3 = NIL_RTGCPHYS;
4517
4518 /*
4519 * Update shadow paging info.
4520 */
4521#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4522 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4523 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4524# if PGM_GST_TYPE != PGM_TYPE_REAL
4525 Assert(!pVM->pgm.s.fNestedPaging);
4526# endif
4527 PGM_LOCK_VOID(pVM);
4528
4529 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4530 {
4531 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4532
4533# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4534 if (pPool->cDirtyPages)
4535 pgmPoolResetDirtyPages(pVM);
4536# endif
4537
4538 /* Mark the page as unlocked; allow flushing again. */
4539 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4540
4541 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), NIL_PGMPOOL_IDX, UINT32_MAX);
4542 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4543 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4544 }
4545
4546 PGM_UNLOCK(pVM);
4547#endif
4548
4549 return rc;
4550}
4551
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette