VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 91939

Last change on this file since 91939 was 91854, checked in by vboxsync, 4 years ago

VMM: Removed PGM_WITHOUT_MAPPINGS and associated mapping code. bugref:9517

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 188.1 KB
Line 
1/* $Id: PGMAllBth.h 91854 2021-10-20 00:50:11Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
6 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
7 * bird: WTF does this mean these days? Looking at PGMAll.cpp it's
8 *
9 * @remarks This file is one big \#ifdef-orgy!
10 *
11 */
12
13/*
14 * Copyright (C) 2006-2020 Oracle Corporation
15 *
16 * This file is part of VirtualBox Open Source Edition (OSE), as
17 * available from http://www.virtualbox.org. This file is free software;
18 * you can redistribute it and/or modify it under the terms of the GNU
19 * General Public License (GPL) as published by the Free Software
20 * Foundation, in version 2 as it comes in the "COPYING" file of the
21 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
22 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
23 */
24
25#ifdef _MSC_VER
26/** @todo we're generating unnecessary code in nested/ept shadow mode and for
27 * real/prot-guest+RC mode. */
28# pragma warning(disable: 4505)
29#endif
30
31
32/*********************************************************************************************************************************
33* Internal Functions *
34*********************************************************************************************************************************/
35RT_C_DECLS_BEGIN
36PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
37#ifndef IN_RING3
38PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
39#endif
40PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
41static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
42static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage);
43static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
44#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
45static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
46#else
47static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
48#endif
49PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
50PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
51PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
52#ifdef VBOX_STRICT
53PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
54#endif
55PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3, bool fPdpesMapped);
56PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu);
57
58#ifdef IN_RING3
59PGM_BTH_DECL(int, Relocate)(PVMCPUCC pVCpu, RTGCPTR offDelta);
60#endif
61RT_C_DECLS_END
62
63
64
65
66/*
67 * Filter out some illegal combinations of guest and shadow paging, so we can
68 * remove redundant checks inside functions.
69 */
70#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE \
71 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
72# error "Invalid combination; PAE guest implies PAE shadow"
73#endif
74
75#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
76 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 \
77 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
78# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
79#endif
80
81#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
82 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE \
83 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
84# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
85#endif
86
87#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE) \
88 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
89# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
90#endif
91
92
93/**
94 * Enters the shadow+guest mode.
95 *
96 * @returns VBox status code.
97 * @param pVCpu The cross context virtual CPU structure.
98 * @param GCPhysCR3 The physical address from the CR3 register.
99 */
100PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
101{
102 /* Here we deal with allocation of the root shadow page table for real and protected mode during mode switches;
103 * Other modes rely on MapCR3/UnmapCR3 to setup the shadow root page tables.
104 */
105#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
106 || PGM_SHW_TYPE == PGM_TYPE_PAE \
107 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
108 && ( PGM_GST_TYPE == PGM_TYPE_REAL \
109 || PGM_GST_TYPE == PGM_TYPE_PROT))
110
111 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
112
113 Assert((HMIsNestedPagingActive(pVM) || VM_IS_NEM_ENABLED(pVM)) == pVM->pgm.s.fNestedPaging);
114 Assert(!pVM->pgm.s.fNestedPaging);
115
116 PGM_LOCK_VOID(pVM);
117 /* Note: we only really need shadow paging in real and protected mode for VT-x and AMD-V (excluding nested paging/EPT modes),
118 * but any calls to GC need a proper shadow page setup as well.
119 */
120 /* Free the previous root mapping if still active. */
121 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
122 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
123 if (pOldShwPageCR3)
124 {
125 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
126
127 /* Mark the page as unlocked; allow flushing again. */
128 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
129
130 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
131 pVCpu->pgm.s.pShwPageCR3R3 = NIL_RTR3PTR;
132 pVCpu->pgm.s.pShwPageCR3R0 = NIL_RTR0PTR;
133 }
134
135 /* construct a fake address. */
136 GCPhysCR3 = RT_BIT_64(63);
137 PPGMPOOLPAGE pNewShwPageCR3;
138 int rc = pgmPoolAlloc(pVM, GCPhysCR3, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
139 NIL_PGMPOOL_IDX, UINT32_MAX, false /*fLockPage*/,
140 &pNewShwPageCR3);
141 AssertRCReturn(rc, rc);
142
143 pVCpu->pgm.s.pShwPageCR3R3 = (R3PTRTYPE(PPGMPOOLPAGE))MMHyperCCToR3(pVM, pNewShwPageCR3);
144 pVCpu->pgm.s.pShwPageCR3R0 = (R0PTRTYPE(PPGMPOOLPAGE))MMHyperCCToR0(pVM, pNewShwPageCR3);
145
146 /* Mark the page as locked; disallow flushing. */
147 pgmPoolLockPage(pPool, pNewShwPageCR3);
148
149 /* Set the current hypervisor CR3. */
150 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
151
152 PGM_UNLOCK(pVM);
153 return rc;
154#else
155 NOREF(pVCpu); NOREF(GCPhysCR3);
156 return VINF_SUCCESS;
157#endif
158}
159
160
161#ifndef IN_RING3
162
163# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
164/**
165 * Deal with a guest page fault.
166 *
167 * @returns Strict VBox status code.
168 * @retval VINF_EM_RAW_GUEST_TRAP
169 * @retval VINF_EM_RAW_EMULATE_INSTR
170 *
171 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
172 * @param pGstWalk The guest page table walk result.
173 * @param uErr The error code.
174 */
175PGM_BTH_DECL(VBOXSTRICTRC, Trap0eHandlerGuestFault)(PVMCPUCC pVCpu, PGSTPTWALK pGstWalk, RTGCUINT uErr)
176{
177 /*
178 * Calc the error code for the guest trap.
179 */
180 uint32_t uNewErr = GST_IS_NX_ACTIVE(pVCpu)
181 ? uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID)
182 : uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US);
183 if ( pGstWalk->Core.fRsvdError
184 || pGstWalk->Core.fBadPhysAddr)
185 {
186 uNewErr |= X86_TRAP_PF_RSVD | X86_TRAP_PF_P;
187 Assert(!pGstWalk->Core.fNotPresent);
188 }
189 else if (!pGstWalk->Core.fNotPresent)
190 uNewErr |= X86_TRAP_PF_P;
191 TRPMSetErrorCode(pVCpu, uNewErr);
192
193 LogFlow(("Guest trap; cr2=%RGv uErr=%RGv lvl=%d\n", pGstWalk->Core.GCPtr, uErr, pGstWalk->Core.uLevel));
194 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
195 return VINF_EM_RAW_GUEST_TRAP;
196}
197# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
198
199
200#if !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
201/**
202 * Deal with a guest page fault.
203 *
204 * The caller has taken the PGM lock.
205 *
206 * @returns Strict VBox status code.
207 *
208 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
209 * @param uErr The error code.
210 * @param pRegFrame The register frame.
211 * @param pvFault The fault address.
212 * @param pPage The guest page at @a pvFault.
213 * @param pGstWalk The guest page table walk result.
214 * @param pfLockTaken PGM lock taken here or not (out). This is true
215 * when we're called.
216 */
217static VBOXSTRICTRC PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame,
218 RTGCPTR pvFault, PPGMPAGE pPage, bool *pfLockTaken
219# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
220 , PGSTPTWALK pGstWalk
221# endif
222 )
223{
224# if !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
225 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A };
226# endif
227 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
228 VBOXSTRICTRC rcStrict;
229
230 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
231 {
232 /*
233 * Physical page access handler.
234 */
235# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
236 const RTGCPHYS GCPhysFault = pGstWalk->Core.GCPhys;
237# else
238 const RTGCPHYS GCPhysFault = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault);
239# endif
240 PPGMPHYSHANDLER pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
241 if (pCur)
242 {
243 PPGMPHYSHANDLERTYPEINT pCurType = PGMPHYSHANDLER_GET_TYPE(pVM, pCur);
244
245# ifdef PGM_SYNC_N_PAGES
246 /*
247 * If the region is write protected and we got a page not present fault, then sync
248 * the pages. If the fault was caused by a read, then restart the instruction.
249 * In case of write access continue to the GC write handler.
250 *
251 * ASSUMES that there is only one handler per page or that they have similar write properties.
252 */
253 if ( !(uErr & X86_TRAP_PF_P)
254 && pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
255 {
256# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
257 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
258# else
259 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
260# endif
261 if ( RT_FAILURE(rcStrict)
262 || !(uErr & X86_TRAP_PF_RW)
263 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
264 {
265 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
266 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
267 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
268 return rcStrict;
269 }
270 }
271# endif
272# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
273 /*
274 * If the access was not thru a #PF(RSVD|...) resync the page.
275 */
276 if ( !(uErr & X86_TRAP_PF_RSVD)
277 && pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
278# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
279 && pGstWalk->Core.fEffectiveRW
280 && !pGstWalk->Core.fEffectiveUS /** @todo Remove pGstWalk->Core.fEffectiveUS and X86_PTE_US further down in the sync code. */
281# endif
282 )
283 {
284# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
285 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
286# else
287 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
288# endif
289 if ( RT_FAILURE(rcStrict)
290 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
291 {
292 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
293 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
294 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
295 return rcStrict;
296 }
297 }
298# endif
299
300 AssertMsg( pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
301 || (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE && (uErr & X86_TRAP_PF_RW)),
302 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enmKind=%d\n",
303 pvFault, GCPhysFault, pPage, uErr, pCurType->enmKind));
304 if (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
305 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysWrite);
306 else
307 {
308 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAll);
309 if (uErr & X86_TRAP_PF_RSVD) STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAllOpt);
310 }
311
312 if (pCurType->CTX_SUFF(pfnPfHandler))
313 {
314 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
315 void *pvUser = pCur->CTX_SUFF(pvUser);
316
317 STAM_PROFILE_START(&pCur->Stat, h);
318 if (pCur->hType != pPool->hAccessHandlerType)
319 {
320 PGM_UNLOCK(pVM);
321 *pfLockTaken = false;
322 }
323
324 rcStrict = pCurType->CTX_SUFF(pfnPfHandler)(pVM, pVCpu, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
325
326# ifdef VBOX_WITH_STATISTICS
327 PGM_LOCK_VOID(pVM);
328 pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
329 if (pCur)
330 STAM_PROFILE_STOP(&pCur->Stat, h);
331 PGM_UNLOCK(pVM);
332# endif
333 }
334 else
335 rcStrict = VINF_EM_RAW_EMULATE_INSTR;
336
337 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndPhys; });
338 return rcStrict;
339 }
340 }
341
342 /*
343 * There is a handled area of the page, but this fault doesn't belong to it.
344 * We must emulate the instruction.
345 *
346 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
347 * we first check if this was a page-not-present fault for a page with only
348 * write access handlers. Restart the instruction if it wasn't a write access.
349 */
350 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersUnhandled);
351
352 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
353 && !(uErr & X86_TRAP_PF_P))
354 {
355# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
356 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
357# else
358 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
359# endif
360 if ( RT_FAILURE(rcStrict)
361 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE
362 || !(uErr & X86_TRAP_PF_RW))
363 {
364 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
365 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
366 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
367 return rcStrict;
368 }
369 }
370
371 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
372 * It's writing to an unhandled part of the LDT page several million times.
373 */
374 rcStrict = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
375 LogFlow(("PGM: PGMInterpretInstruction -> rcStrict=%d pPage=%R[pgmpage]\n", VBOXSTRICTRC_VAL(rcStrict), pPage));
376 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndUnhandled; });
377 return rcStrict;
378} /* if any kind of handler */
379# endif /* !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE*/
380
381
382/**
383 * \#PF Handler for raw-mode guest execution.
384 *
385 * @returns VBox status code (appropriate for trap handling and GC return).
386 *
387 * @param pVCpu The cross context virtual CPU structure.
388 * @param uErr The trap error code.
389 * @param pRegFrame Trap register frame.
390 * @param pvFault The fault address.
391 * @param pfLockTaken PGM lock taken here or not (out)
392 */
393PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
394{
395 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
396
397 *pfLockTaken = false;
398
399# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
400 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
401 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
402 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
403 && PGM_SHW_TYPE != PGM_TYPE_NONE
404 int rc;
405
406# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
407 /*
408 * Walk the guest page translation tables and check if it's a guest fault.
409 */
410 GSTPTWALK GstWalk;
411 rc = PGM_GST_NAME(Walk)(pVCpu, pvFault, &GstWalk);
412 if (RT_FAILURE_NP(rc))
413 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &GstWalk, uErr));
414
415 /* assert some GstWalk sanity. */
416# if PGM_GST_TYPE == PGM_TYPE_AMD64
417 /*AssertMsg(GstWalk.Pml4e.u == GstWalk.pPml4e->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pml4e.u, (uint64_t)GstWalk.pPml4e->u)); - not always true with SMP guests. */
418# endif
419# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
420 /*AssertMsg(GstWalk.Pdpe.u == GstWalk.pPdpe->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pdpe.u, (uint64_t)GstWalk.pPdpe->u)); - ditto */
421# endif
422 /*AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); - ditto */
423 /*AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); - ditto */
424 Assert(GstWalk.Core.fSucceeded);
425
426 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
427 {
428 if ( ( (uErr & X86_TRAP_PF_RW)
429 && !GstWalk.Core.fEffectiveRW
430 && ( (uErr & X86_TRAP_PF_US)
431 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
432 || ((uErr & X86_TRAP_PF_US) && !GstWalk.Core.fEffectiveUS)
433 || ((uErr & X86_TRAP_PF_ID) && GstWalk.Core.fEffectiveNX)
434 )
435 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &GstWalk, uErr));
436 }
437
438 /* Take the big lock now before we update flags. */
439 *pfLockTaken = true;
440 PGM_LOCK_VOID(pVM);
441
442 /*
443 * Set the accessed and dirty flags.
444 */
445 /** @todo Should probably use cmpxchg logic here as we're potentially racing
446 * other CPUs in SMP configs. (the lock isn't enough, since we take it
447 * after walking and the page tables could be stale already) */
448# if PGM_GST_TYPE == PGM_TYPE_AMD64
449 if (!(GstWalk.Pml4e.u & X86_PML4E_A))
450 {
451 GstWalk.Pml4e.u |= X86_PML4E_A;
452 GST_ATOMIC_OR(&GstWalk.pPml4e->u, X86_PML4E_A);
453 }
454 if (!(GstWalk.Pdpe.u & X86_PDPE_A))
455 {
456 GstWalk.Pdpe.u |= X86_PDPE_A;
457 GST_ATOMIC_OR(&GstWalk.pPdpe->u, X86_PDPE_A);
458 }
459# endif
460 if (GstWalk.Core.fBigPage)
461 {
462 Assert(GstWalk.Pde.u & X86_PDE_PS);
463 if (uErr & X86_TRAP_PF_RW)
464 {
465 if ((GstWalk.Pde.u & (X86_PDE4M_A | X86_PDE4M_D)) != (X86_PDE4M_A | X86_PDE4M_D))
466 {
467 GstWalk.Pde.u |= X86_PDE4M_A | X86_PDE4M_D;
468 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A | X86_PDE4M_D);
469 }
470 }
471 else
472 {
473 if (!(GstWalk.Pde.u & X86_PDE4M_A))
474 {
475 GstWalk.Pde.u |= X86_PDE4M_A;
476 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A);
477 }
478 }
479 }
480 else
481 {
482 Assert(!(GstWalk.Pde.u & X86_PDE_PS));
483 if (!(GstWalk.Pde.u & X86_PDE_A))
484 {
485 GstWalk.Pde.u |= X86_PDE_A;
486 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE_A);
487 }
488
489 if (uErr & X86_TRAP_PF_RW)
490 {
491# ifdef VBOX_WITH_STATISTICS
492 if (GstWalk.Pte.u & X86_PTE_D)
493 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageAlreadyDirty));
494 else
495 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtiedPage));
496# endif
497 if ((GstWalk.Pte.u & (X86_PTE_A | X86_PTE_D)) != (X86_PTE_A | X86_PTE_D))
498 {
499 GstWalk.Pte.u |= X86_PTE_A | X86_PTE_D;
500 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A | X86_PTE_D);
501 }
502 }
503 else
504 {
505 if (!(GstWalk.Pte.u & X86_PTE_A))
506 {
507 GstWalk.Pte.u |= X86_PTE_A;
508 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A);
509 }
510 }
511 Assert(GstWalk.Pte.u == GstWalk.pPte->u);
512 }
513 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u,
514 ("%RX64 %RX64 pPte=%p pPde=%p Pte=%RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u, GstWalk.pPte, GstWalk.pPde, (uint64_t)GstWalk.pPte->u));
515# else /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
516 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A}; /** @todo eliminate this */
517
518 /* Take the big lock now. */
519 *pfLockTaken = true;
520 PGM_LOCK_VOID(pVM);
521# endif /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
522
523# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
524 /*
525 * If it is a reserved bit fault we know that it is an MMIO (access
526 * handler) related fault and can skip some 200 lines of code.
527 */
528 if (uErr & X86_TRAP_PF_RSVD)
529 {
530 Assert(uErr & X86_TRAP_PF_P);
531 PPGMPAGE pPage;
532# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
533 rc = pgmPhysGetPageEx(pVM, GstWalk.Core.GCPhys, &pPage);
534 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
535 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
536 pfLockTaken, &GstWalk));
537 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
538# else
539 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault), &pPage);
540 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
541 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
542 pfLockTaken));
543 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
544# endif
545 AssertRC(rc);
546 PGM_INVL_PG(pVCpu, pvFault);
547 return rc; /* Restart with the corrected entry. */
548 }
549# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
550
551 /*
552 * Fetch the guest PDE, PDPE and PML4E.
553 */
554# if PGM_SHW_TYPE == PGM_TYPE_32BIT
555 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
556 PX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
557
558# elif PGM_SHW_TYPE == PGM_TYPE_PAE
559 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
560 PX86PDPAE pPDDst;
561# if PGM_GST_TYPE == PGM_TYPE_PAE
562 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, GstWalk.Pdpe.u, &pPDDst);
563# else
564 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, X86_PDPE_P, &pPDDst); /* RW, US and A are reserved in PAE mode. */
565# endif
566 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
567
568# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
569 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
570 PX86PDPAE pPDDst;
571# if PGM_GST_TYPE == PGM_TYPE_PROT /* (AMD-V nested paging) */
572 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A,
573 X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A, &pPDDst);
574# else
575 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, GstWalk.Pml4e.u, GstWalk.Pdpe.u, &pPDDst);
576# endif
577 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
578
579# elif PGM_SHW_TYPE == PGM_TYPE_EPT
580 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
581 PEPTPD pPDDst;
582 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
583 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
584# endif
585 Assert(pPDDst);
586
587# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
588 /*
589 * Dirty page handling.
590 *
591 * If we successfully correct the write protection fault due to dirty bit
592 * tracking, then return immediately.
593 */
594 if (uErr & X86_TRAP_PF_RW) /* write fault? */
595 {
596 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
597 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], GstWalk.pPde, pvFault);
598 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
599 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
600 {
601 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0
602 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
603 ? &pVCpu->pgm.s.Stats.StatRZTrap0eTime2DirtyAndAccessed
604 : &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
605 Log8(("Trap0eHandler: returns VINF_SUCCESS\n"));
606 return VINF_SUCCESS;
607 }
608#ifdef DEBUG_bird
609 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); // - triggers with smp w7 guests.
610 AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); // - ditto.
611#endif
612 }
613
614# if 0 /* rarely useful; leave for debugging. */
615 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
616# endif
617# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
618
619 /*
620 * A common case is the not-present error caused by lazy page table syncing.
621 *
622 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
623 * here so we can safely assume that the shadow PT is present when calling
624 * SyncPage later.
625 *
626 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
627 * of mapping conflict and defer to SyncCR3 in R3.
628 * (Again, we do NOT support access handlers for non-present guest pages.)
629 *
630 */
631# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
632 Assert(GstWalk.Pde.u & X86_PDE_P);
633# endif
634 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
635 && !SHW_PDE_IS_P(pPDDst->a[iPDDst]))
636 {
637 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2SyncPT; });
638# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
639 LogFlow(("=>SyncPT %04x = %08RX64\n", (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, (uint64_t)GstWalk.Pde.u));
640 rc = PGM_BTH_NAME(SyncPT)(pVCpu, (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, GstWalk.pPd, pvFault);
641# else
642 LogFlow(("=>SyncPT pvFault=%RGv\n", pvFault));
643 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0, NULL, pvFault);
644# endif
645 if (RT_SUCCESS(rc))
646 return rc;
647 Log(("SyncPT: %RGv failed!! rc=%Rrc\n", pvFault, rc));
648 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
649 return VINF_PGM_SYNC_CR3;
650 }
651
652 /*
653 * Check if this fault address is flagged for special treatment,
654 * which means we'll have to figure out the physical address and
655 * check flags associated with it.
656 *
657 * ASSUME that we can limit any special access handling to pages
658 * in page tables which the guest believes to be present.
659 */
660# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
661 RTGCPHYS GCPhys = GstWalk.Core.GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
662# else
663 RTGCPHYS GCPhys = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault & ~(RTGCPHYS)PAGE_OFFSET_MASK);
664# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
665 PPGMPAGE pPage;
666 rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
667 if (RT_FAILURE(rc))
668 {
669 /*
670 * When the guest accesses invalid physical memory (e.g. probing
671 * of RAM or accessing a remapped MMIO range), then we'll fall
672 * back to the recompiler to emulate the instruction.
673 */
674 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
675 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersInvalid);
676 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2InvalidPhys; });
677 return VINF_EM_RAW_EMULATE_INSTR;
678 }
679
680 /*
681 * Any handlers for this page?
682 */
683 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
684# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
685 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken,
686 &GstWalk));
687# else
688 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken));
689# endif
690
691 /*
692 * We are here only if page is present in Guest page tables and
693 * trap is not handled by our handlers.
694 *
695 * Check it for page out-of-sync situation.
696 */
697 if (!(uErr & X86_TRAP_PF_P))
698 {
699 /*
700 * Page is not present in our page tables. Try to sync it!
701 */
702 if (uErr & X86_TRAP_PF_US)
703 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
704 else /* supervisor */
705 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
706
707 if (PGM_PAGE_IS_BALLOONED(pPage))
708 {
709 /* Emulate reads from ballooned pages as they are not present in
710 our shadow page tables. (Required for e.g. Solaris guests; soft
711 ecc, random nr generator.) */
712 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
713 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
714 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
715 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Ballooned; });
716 return rc;
717 }
718
719# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
720 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
721# else
722 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
723# endif
724 if (RT_SUCCESS(rc))
725 {
726 /* The page was successfully synced, return to the guest. */
727 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSync; });
728 return VINF_SUCCESS;
729 }
730 }
731 else /* uErr & X86_TRAP_PF_P: */
732 {
733 /*
734 * Write protected pages are made writable when the guest makes the
735 * first write to it. This happens for pages that are shared, write
736 * monitored or not yet allocated.
737 *
738 * We may also end up here when CR0.WP=0 in the guest.
739 *
740 * Also, a side effect of not flushing global PDEs are out of sync
741 * pages due to physical monitored regions, that are no longer valid.
742 * Assume for now it only applies to the read/write flag.
743 */
744 if (uErr & X86_TRAP_PF_RW)
745 {
746 /*
747 * Check if it is a read-only page.
748 */
749 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
750 {
751 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
752 Assert(!PGM_PAGE_IS_ZERO(pPage));
753 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
754 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2MakeWritable; });
755
756 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
757 if (rc != VINF_SUCCESS)
758 {
759 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
760 return rc;
761 }
762 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
763 return VINF_EM_NO_MEMORY;
764 }
765
766# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
767 /*
768 * Check to see if we need to emulate the instruction if CR0.WP=0.
769 */
770 if ( !GstWalk.Core.fEffectiveRW
771 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
772 && CPUMGetGuestCPL(pVCpu) < 3)
773 {
774 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
775
776 /*
777 * The Netware WP0+RO+US hack.
778 *
779 * Netware sometimes(/always?) runs with WP0. It has been observed doing
780 * excessive write accesses to pages which are mapped with US=1 and RW=0
781 * while WP=0. This causes a lot of exits and extremely slow execution.
782 * To avoid trapping and emulating every write here, we change the shadow
783 * page table entry to map it as US=0 and RW=1 until user mode tries to
784 * access it again (see further below). We count these shadow page table
785 * changes so we can avoid having to clear the page pool every time the WP
786 * bit changes to 1 (see PGMCr0WpEnabled()).
787 */
788# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) && 1
789 if ( GstWalk.Core.fEffectiveUS
790 && !GstWalk.Core.fEffectiveRW
791 && (GstWalk.Core.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
792 && pVM->cCpus == 1 /* Sorry, no go on SMP. Add CFGM option? */)
793 {
794 Log(("PGM #PF: Netware WP0+RO+US hack: pvFault=%RGp uErr=%#x (big=%d)\n", pvFault, uErr, GstWalk.Core.fBigPage));
795 rc = pgmShwMakePageSupervisorAndWritable(pVCpu, pvFault, GstWalk.Core.fBigPage, PGM_MK_PG_IS_WRITE_FAULT);
796 if (rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3)
797 {
798 PGM_INVL_PG(pVCpu, pvFault);
799 pVCpu->pgm.s.cNetwareWp0Hacks++;
800 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsHack; });
801 return rc;
802 }
803 AssertMsg(RT_FAILURE_NP(rc), ("%Rrc\n", rc));
804 Log(("pgmShwMakePageSupervisorAndWritable(%RGv) failed with rc=%Rrc - ignored\n", pvFault, rc));
805 }
806# endif
807
808 /* Interpret the access. */
809 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
810 Log(("PGM #PF: WP0 emulation (pvFault=%RGp uErr=%#x cpl=%d fBig=%d fEffUs=%d)\n", pvFault, uErr, CPUMGetGuestCPL(pVCpu), GstWalk.Core.fBigPage, GstWalk.Core.fEffectiveUS));
811 if (RT_SUCCESS(rc))
812 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulInRZ);
813 else
814 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulToR3);
815 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2WPEmulation; });
816 return rc;
817 }
818# endif
819 /// @todo count the above case; else
820 if (uErr & X86_TRAP_PF_US)
821 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
822 else /* supervisor */
823 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
824
825 /*
826 * Sync the page.
827 *
828 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
829 * page is not present, which is not true in this case.
830 */
831# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
832 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
833# else
834 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
835# endif
836 if (RT_SUCCESS(rc))
837 {
838 /*
839 * Page was successfully synced, return to guest but invalidate
840 * the TLB first as the page is very likely to be in it.
841 */
842# if PGM_SHW_TYPE == PGM_TYPE_EPT
843 HMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
844# else
845 PGM_INVL_PG(pVCpu, pvFault);
846# endif
847# ifdef VBOX_STRICT
848 RTGCPHYS GCPhys2 = RTGCPHYS_MAX;
849 uint64_t fPageGst = UINT64_MAX;
850 if (!pVM->pgm.s.fNestedPaging)
851 {
852 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
853 AssertMsg(RT_SUCCESS(rc) && ((fPageGst & X86_PTE_RW) || ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG && CPUMGetGuestCPL(pVCpu) < 3)), ("rc=%Rrc fPageGst=%RX64\n", rc, fPageGst));
854 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
855 }
856# if 0 /* Bogus! Triggers incorrectly with w7-64 and later for the SyncPage case: "Pde at %RGv changed behind our back?" */
857 uint64_t fPageShw = 0;
858 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
859 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */,
860 ("rc=%Rrc fPageShw=%RX64 GCPhys2=%RGp fPageGst=%RX64 pvFault=%RGv\n", rc, fPageShw, GCPhys2, fPageGst, pvFault));
861# endif
862# endif /* VBOX_STRICT */
863 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndObs; });
864 return VINF_SUCCESS;
865 }
866 }
867# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
868 /*
869 * Check for Netware WP0+RO+US hack from above and undo it when user
870 * mode accesses the page again.
871 */
872 else if ( GstWalk.Core.fEffectiveUS
873 && !GstWalk.Core.fEffectiveRW
874 && (GstWalk.Core.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
875 && pVCpu->pgm.s.cNetwareWp0Hacks > 0
876 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
877 && CPUMGetGuestCPL(pVCpu) == 3
878 && pVM->cCpus == 1
879 )
880 {
881 Log(("PGM #PF: Undo netware WP0+RO+US hack: pvFault=%RGp uErr=%#x\n", pvFault, uErr));
882 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
883 if (RT_SUCCESS(rc))
884 {
885 PGM_INVL_PG(pVCpu, pvFault);
886 pVCpu->pgm.s.cNetwareWp0Hacks--;
887 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsUnhack; });
888 return VINF_SUCCESS;
889 }
890 }
891# endif /* PGM_WITH_PAGING */
892
893 /** @todo else: why are we here? */
894
895# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && defined(VBOX_STRICT)
896 /*
897 * Check for VMM page flags vs. Guest page flags consistency.
898 * Currently only for debug purposes.
899 */
900 if (RT_SUCCESS(rc))
901 {
902 /* Get guest page flags. */
903 uint64_t fPageGst;
904 int rc2 = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
905 if (RT_SUCCESS(rc2))
906 {
907 uint64_t fPageShw = 0;
908 rc2 = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
909
910#if 0
911 /*
912 * Compare page flags.
913 * Note: we have AVL, A, D bits desynced.
914 */
915 AssertMsg( (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
916 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
917 || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0
918 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
919 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
920 && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW
921 && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US),
922 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64 rc=%d\n",
923 pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst, rc));
92401:01:15.623511 00:08:43.266063 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
92501:01:15.623511 00:08:43.266064 Location : e:\vbox\svn\trunk\srcPage flags mismatch! pvFault=fffff801b0d7b000 uErr=11 GCPhys=0000000019b52000 fPageShw=0 fPageGst=77b0000000000121 rc=0
926
92701:01:15.625516 00:08:43.268051 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
92801:01:15.625516 00:08:43.268051 Location :
929e:\vbox\svn\trunk\srcPage flags mismatch!
930pvFault=fffff801b0d7b000
931 uErr=11 X86_TRAP_PF_ID | X86_TRAP_PF_P
932GCPhys=0000000019b52000
933fPageShw=0
934fPageGst=77b0000000000121
935rc=0
936#endif
937
938 }
939 else
940 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
941 }
942 else
943 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
944# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && VBOX_STRICT */
945 }
946
947
948 /*
949 * If we get here it is because something failed above, i.e. most like guru
950 * meditiation time.
951 */
952 LogRel(("%s: returns rc=%Rrc pvFault=%RGv uErr=%RX64 cs:rip=%04x:%08RX64\n",
953 __PRETTY_FUNCTION__, rc, pvFault, (uint64_t)uErr, pRegFrame->cs.Sel, pRegFrame->rip));
954 return rc;
955
956# else /* Nested paging, EPT except PGM_GST_TYPE = PROT, NONE. */
957 NOREF(uErr); NOREF(pRegFrame); NOREF(pvFault);
958 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
959 return VERR_PGM_NOT_USED_IN_MODE;
960# endif
961}
962
963#endif /* !IN_RING3 */
964
965
966/**
967 * Emulation of the invlpg instruction.
968 *
969 *
970 * @returns VBox status code.
971 *
972 * @param pVCpu The cross context virtual CPU structure.
973 * @param GCPtrPage Page to invalidate.
974 *
975 * @remark ASSUMES that the guest is updating before invalidating. This order
976 * isn't required by the CPU, so this is speculative and could cause
977 * trouble.
978 * @remark No TLB shootdown is done on any other VCPU as we assume that
979 * invlpg emulation is the *only* reason for calling this function.
980 * (The guest has to shoot down TLB entries on other CPUs itself)
981 * Currently true, but keep in mind!
982 *
983 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
984 * Should only be required when PGMPOOL_WITH_OPTIMIZED_DIRTY_PT is active (PAE or AMD64 (for now))
985 */
986PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
987{
988#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
989 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
990 && PGM_SHW_TYPE != PGM_TYPE_NONE
991 int rc;
992 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
993 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
994
995 PGM_LOCK_ASSERT_OWNER(pVM);
996
997 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
998
999 /*
1000 * Get the shadow PD entry and skip out if this PD isn't present.
1001 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1002 */
1003# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1004 const unsigned iPDDst = (uint32_t)GCPtrPage >> SHW_PD_SHIFT;
1005 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1006
1007 /* Fetch the pgm pool shadow descriptor. */
1008 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1009# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1010 if (!pShwPde)
1011 {
1012 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1013 return VINF_SUCCESS;
1014 }
1015# else
1016 Assert(pShwPde);
1017# endif
1018
1019# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1020 const unsigned iPdpt = (uint32_t)GCPtrPage >> X86_PDPT_SHIFT;
1021 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
1022
1023 /* If the shadow PDPE isn't present, then skip the invalidate. */
1024# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1025 if (!pPdptDst || !(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1026# else
1027 if (!(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1028# endif
1029 {
1030 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1031 PGM_INVL_PG(pVCpu, GCPtrPage);
1032 return VINF_SUCCESS;
1033 }
1034
1035 /* Fetch the pgm pool shadow descriptor. */
1036 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1037 AssertReturn(pShwPde, VERR_PGM_POOL_GET_PAGE_FAILED);
1038
1039 PX86PDPAE pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1040 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1041 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1042
1043# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1044 /* PML4 */
1045 /*const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;*/
1046 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1047 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1048 PX86PDPAE pPDDst;
1049 PX86PDPT pPdptDst;
1050 PX86PML4E pPml4eDst;
1051 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1052 if (rc != VINF_SUCCESS)
1053 {
1054 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1055 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1056 PGM_INVL_PG(pVCpu, GCPtrPage);
1057 return VINF_SUCCESS;
1058 }
1059 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1060 Assert(pPDDst);
1061 Assert(pPdptDst->a[iPdpt].u & X86_PDPE_P);
1062
1063 /* Fetch the pgm pool shadow descriptor. */
1064 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1065 Assert(pShwPde);
1066
1067# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1068
1069 const SHWPDE PdeDst = *pPdeDst;
1070 if (!(PdeDst.u & X86_PDE_P))
1071 {
1072 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1073 PGM_INVL_PG(pVCpu, GCPtrPage);
1074 return VINF_SUCCESS;
1075 }
1076
1077 /*
1078 * Get the guest PD entry and calc big page.
1079 */
1080# if PGM_GST_TYPE == PGM_TYPE_32BIT
1081 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1082 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
1083 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1084# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1085 unsigned iPDSrc = 0;
1086# if PGM_GST_TYPE == PGM_TYPE_PAE
1087 X86PDPE PdpeSrcIgn;
1088 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrcIgn);
1089# else /* AMD64 */
1090 PX86PML4E pPml4eSrcIgn;
1091 X86PDPE PdpeSrcIgn;
1092 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrcIgn, &PdpeSrcIgn, &iPDSrc);
1093# endif
1094 GSTPDE PdeSrc;
1095
1096 if (pPDSrc)
1097 PdeSrc = pPDSrc->a[iPDSrc];
1098 else
1099 PdeSrc.u = 0;
1100# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1101 const bool fWasBigPage = RT_BOOL(PdeDst.u & PGM_PDFLAGS_BIG_PAGE);
1102 const bool fIsBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
1103 if (fWasBigPage != fIsBigPage)
1104 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1105
1106# ifdef IN_RING3
1107 /*
1108 * If a CR3 Sync is pending we may ignore the invalidate page operation
1109 * depending on the kind of sync and if it's a global page or not.
1110 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1111 */
1112# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1113 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1114 || ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1115 && fIsBigPage
1116 && (PdeSrc.u & X86_PDE4M_G)
1117 )
1118 )
1119# else
1120 if (VM_FF_IS_ANY_SET(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1121# endif
1122 {
1123 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1124 return VINF_SUCCESS;
1125 }
1126# endif /* IN_RING3 */
1127
1128 /*
1129 * Deal with the Guest PDE.
1130 */
1131 rc = VINF_SUCCESS;
1132 if (PdeSrc.u & X86_PDE_P)
1133 {
1134 Assert( (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
1135 && ((PdeSrc.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW) || pVCpu->pgm.s.cNetwareWp0Hacks > 0));
1136 if (!fIsBigPage)
1137 {
1138 /*
1139 * 4KB - page.
1140 */
1141 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1142 RTGCPHYS GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1143
1144# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1145 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1146 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (PAGE_SIZE / 2)));
1147# endif
1148 if (pShwPage->GCPhys == GCPhys)
1149 {
1150 /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1151 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1152
1153 PGSTPT pPTSrc;
1154 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1155 if (RT_SUCCESS(rc))
1156 {
1157 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1158 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1159 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1160 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1161 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1162 GCPtrPage, PteSrc.u & X86_PTE_P,
1163 (PteSrc.u & PdeSrc.u & X86_PTE_RW),
1164 (PteSrc.u & PdeSrc.u & X86_PTE_US),
1165 (uint64_t)PteSrc.u,
1166 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1167 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1168 }
1169 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1170 PGM_INVL_PG(pVCpu, GCPtrPage);
1171 }
1172 else
1173 {
1174 /*
1175 * The page table address changed.
1176 */
1177 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1178 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1179 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1180 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1181 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1182 PGM_INVL_VCPU_TLBS(pVCpu);
1183 }
1184 }
1185 else
1186 {
1187 /*
1188 * 2/4MB - page.
1189 */
1190 /* Before freeing the page, check if anything really changed. */
1191 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1192 RTGCPHYS GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1193# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1194 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1195 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
1196# endif
1197 if ( pShwPage->GCPhys == GCPhys
1198 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1199 {
1200 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1201 /** @todo This test is wrong as it cannot check the G bit!
1202 * FIXME */
1203 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1204 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1205 && ( (PdeSrc.u & X86_PDE4M_D) /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1206 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1207 {
1208 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1209 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1210 return VINF_SUCCESS;
1211 }
1212 }
1213
1214 /*
1215 * Ok, the page table is present and it's been changed in the guest.
1216 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1217 * We could do this for some flushes in GC too, but we need an algorithm for
1218 * deciding which 4MB pages containing code likely to be executed very soon.
1219 */
1220 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1221 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1222 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1223 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1224 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1225 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1226 }
1227 }
1228 else
1229 {
1230 /*
1231 * Page directory is not present, mark shadow PDE not present.
1232 */
1233 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1234 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1235 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1236 PGM_INVL_PG(pVCpu, GCPtrPage);
1237 }
1238 return rc;
1239
1240#else /* guest real and protected mode, nested + ept, none. */
1241 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1242 NOREF(pVCpu); NOREF(GCPtrPage);
1243 return VINF_SUCCESS;
1244#endif
1245}
1246
1247#if PGM_SHW_TYPE != PGM_TYPE_NONE
1248
1249/**
1250 * Update the tracking of shadowed pages.
1251 *
1252 * @param pVCpu The cross context virtual CPU structure.
1253 * @param pShwPage The shadow page.
1254 * @param HCPhys The physical page we is being dereferenced.
1255 * @param iPte Shadow PTE index
1256 * @param GCPhysPage Guest physical address (only valid if pShwPage->fDirty is set)
1257 */
1258DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte,
1259 RTGCPHYS GCPhysPage)
1260{
1261 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1262
1263# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1264 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1265 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1266
1267 /* Use the hint we retrieved from the cached guest PT. */
1268 if (pShwPage->fDirty)
1269 {
1270 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1271
1272 Assert(pShwPage->cPresent);
1273 Assert(pPool->cPresent);
1274 pShwPage->cPresent--;
1275 pPool->cPresent--;
1276
1277 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysPage);
1278 AssertRelease(pPhysPage);
1279 pgmTrackDerefGCPhys(pPool, pShwPage, pPhysPage, iPte);
1280 return;
1281 }
1282# else
1283 NOREF(GCPhysPage);
1284# endif
1285
1286 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatTrackDeref, a);
1287 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1288
1289 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1290 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1291 * 2. write protect all shadowed pages. I.e. implement caching.
1292 */
1293 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1294
1295 /*
1296 * Find the guest address.
1297 */
1298 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
1299 pRam;
1300 pRam = pRam->CTX_SUFF(pNext))
1301 {
1302 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1303 while (iPage-- > 0)
1304 {
1305 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1306 {
1307 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1308
1309 Assert(pShwPage->cPresent);
1310 Assert(pPool->cPresent);
1311 pShwPage->cPresent--;
1312 pPool->cPresent--;
1313
1314 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1315 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatTrackDeref, a);
1316 return;
1317 }
1318 }
1319 }
1320
1321 for (;;)
1322 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1323}
1324
1325
1326/**
1327 * Update the tracking of shadowed pages.
1328 *
1329 * @param pVCpu The cross context virtual CPU structure.
1330 * @param pShwPage The shadow page.
1331 * @param u16 The top 16-bit of the pPage->HCPhys.
1332 * @param pPage Pointer to the guest page. this will be modified.
1333 * @param iPTDst The index into the shadow table.
1334 */
1335DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1336{
1337 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1338
1339 /*
1340 * Just deal with the simple first time here.
1341 */
1342 if (!u16)
1343 {
1344 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackVirgin);
1345 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1346 /* Save the page table index. */
1347 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, iPTDst);
1348 }
1349 else
1350 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1351
1352 /* write back */
1353 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1354 PGM_PAGE_SET_TRACKING(pVM, pPage, u16);
1355
1356 /* update statistics. */
1357 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1358 pShwPage->cPresent++;
1359 if (pShwPage->iFirstPresent > iPTDst)
1360 pShwPage->iFirstPresent = iPTDst;
1361}
1362
1363
1364/**
1365 * Modifies a shadow PTE to account for access handlers.
1366 *
1367 * @param pVM The cross context VM structure.
1368 * @param pPage The page in question.
1369 * @param fPteSrc The shadowed flags of the source PTE. Must include the
1370 * A (accessed) bit so it can be emulated correctly.
1371 * @param pPteDst The shadow PTE (output). This is temporary storage and
1372 * does not need to be set atomically.
1373 */
1374DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVMCC pVM, PCPGMPAGE pPage, uint64_t fPteSrc, PSHWPTE pPteDst)
1375{
1376 NOREF(pVM); RT_NOREF_PV(fPteSrc);
1377
1378 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1379 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1380 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1381 {
1382 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark read-only\n", pPage));
1383# if PGM_SHW_TYPE == PGM_TYPE_EPT
1384 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage) | EPT_E_READ | EPT_E_EXECUTE | EPT_E_TYPE_WB | EPT_E_IGNORE_PAT;
1385# else
1386 if (fPteSrc & X86_PTE_A)
1387 {
1388 SHW_PTE_SET(*pPteDst, fPteSrc | PGM_PAGE_GET_HCPHYS(pPage));
1389 SHW_PTE_SET_RO(*pPteDst);
1390 }
1391 else
1392 SHW_PTE_SET(*pPteDst, 0);
1393# endif
1394 }
1395# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1396# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1397 else if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
1398 && ( BTH_IS_NP_ACTIVE(pVM)
1399 || (fPteSrc & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW) /** @todo Remove X86_PTE_US here and pGstWalk->Core.fEffectiveUS before the sync page test. */
1400# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1401 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1402# endif
1403 )
1404 {
1405 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1406# if PGM_SHW_TYPE == PGM_TYPE_EPT
1407 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1408 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg
1409 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1410 | EPT_E_WRITE
1411 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1412 | EPT_E_TYPE_INVALID_3;
1413# else
1414 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1415 SHW_PTE_SET(*pPteDst, pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P);
1416# endif
1417 }
1418# endif
1419# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1420 else
1421 {
1422 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1423 SHW_PTE_SET(*pPteDst, 0);
1424 }
1425 /** @todo count these kinds of entries. */
1426}
1427
1428
1429/**
1430 * Creates a 4K shadow page for a guest page.
1431 *
1432 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1433 * physical address. The PdeSrc argument only the flags are used. No page
1434 * structured will be mapped in this function.
1435 *
1436 * @param pVCpu The cross context virtual CPU structure.
1437 * @param pPteDst Destination page table entry.
1438 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1439 * Can safely assume that only the flags are being used.
1440 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1441 * @param pShwPage Pointer to the shadow page.
1442 * @param iPTDst The index into the shadow table.
1443 *
1444 * @remark Not used for 2/4MB pages!
1445 */
1446# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
1447static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1448 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1449# else
1450static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage,
1451 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1452# endif
1453{
1454 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1455 RTGCPHYS GCPhysOldPage = NIL_RTGCPHYS;
1456
1457# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1458 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1459 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1460
1461 if (pShwPage->fDirty)
1462 {
1463 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1464 PGSTPT pGstPT;
1465
1466 /* Note that iPTDst can be used to index the guest PT even in the pae/32bit combo as we copy only half the table; see pgmPoolAddDirtyPage. */
1467 pGstPT = (PGSTPT)&pPool->aDirtyPages[pShwPage->idxDirtyEntry].aPage[0];
1468 GCPhysOldPage = GST_GET_PTE_GCPHYS(pGstPT->a[iPTDst]);
1469 pGstPT->a[iPTDst].u = PteSrc.u;
1470 }
1471# else
1472 Assert(!pShwPage->fDirty);
1473# endif
1474
1475# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1476 if ( (PteSrc.u & X86_PTE_P)
1477 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1478# endif
1479 {
1480# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1481 RTGCPHYS GCPhysPage = GST_GET_PTE_GCPHYS(PteSrc);
1482# endif
1483 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysPage);
1484
1485 /*
1486 * Find the ram range.
1487 */
1488 PPGMPAGE pPage;
1489 int rc = pgmPhysGetPageEx(pVM, GCPhysPage, &pPage);
1490 if (RT_SUCCESS(rc))
1491 {
1492 /* Ignore ballooned pages.
1493 Don't return errors or use a fatal assert here as part of a
1494 shadow sync range might included ballooned pages. */
1495 if (PGM_PAGE_IS_BALLOONED(pPage))
1496 {
1497 Assert(!SHW_PTE_IS_P(*pPteDst)); /** @todo user tracking needs updating if this triggers. */
1498 return;
1499 }
1500
1501# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1502 /* Make the page writable if necessary. */
1503 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1504 && ( PGM_PAGE_IS_ZERO(pPage)
1505# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1506 || ( (PteSrc.u & X86_PTE_RW)
1507# else
1508 || ( 1
1509# endif
1510 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1511# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1512 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1513# endif
1514# ifdef VBOX_WITH_PAGE_SHARING
1515 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1516# endif
1517 )
1518 )
1519 )
1520 {
1521 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhysPage);
1522 AssertRC(rc);
1523 }
1524# endif
1525
1526 /*
1527 * Make page table entry.
1528 */
1529 SHWPTE PteDst;
1530# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1531 uint64_t fGstShwPteFlags = GST_GET_PTE_SHW_FLAGS(pVCpu, PteSrc);
1532# else
1533 uint64_t fGstShwPteFlags = X86_PTE_P | X86_PTE_RW | X86_PTE_US | X86_PTE_A | X86_PTE_D;
1534# endif
1535 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1536 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, fGstShwPteFlags, &PteDst);
1537 else
1538 {
1539# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1540 /*
1541 * If the page or page directory entry is not marked accessed,
1542 * we mark the page not present.
1543 */
1544 if (!(PteSrc.u & X86_PTE_A) || !(PdeSrc.u & X86_PDE_A))
1545 {
1546 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1547 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,AccessedPage));
1548 SHW_PTE_SET(PteDst, 0);
1549 }
1550 /*
1551 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1552 * when the page is modified.
1553 */
1554 else if (!(PteSrc.u & X86_PTE_D) && (PdeSrc.u & PteSrc.u & X86_PTE_RW))
1555 {
1556 AssertCompile(X86_PTE_RW == X86_PDE_RW);
1557 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPage));
1558 SHW_PTE_SET(PteDst,
1559 fGstShwPteFlags
1560 | PGM_PAGE_GET_HCPHYS(pPage)
1561 | PGM_PTFLAGS_TRACK_DIRTY);
1562 SHW_PTE_SET_RO(PteDst);
1563 }
1564 else
1565# endif
1566 {
1567 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageSkipped));
1568# if PGM_SHW_TYPE == PGM_TYPE_EPT
1569 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage)
1570 | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_TYPE_WB | EPT_E_IGNORE_PAT;
1571# else
1572 SHW_PTE_SET(PteDst, fGstShwPteFlags | PGM_PAGE_GET_HCPHYS(pPage));
1573# endif
1574 }
1575
1576 /*
1577 * Make sure only allocated pages are mapped writable.
1578 */
1579 if ( SHW_PTE_IS_P_RW(PteDst)
1580 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1581 {
1582 /* Still applies to shared pages. */
1583 Assert(!PGM_PAGE_IS_ZERO(pPage));
1584 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet. Why, isn't it? */
1585 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", GCPhysPage, pPage, iPTDst));
1586 }
1587 }
1588
1589 /*
1590 * Keep user track up to date.
1591 */
1592 if (SHW_PTE_IS_P(PteDst))
1593 {
1594 if (!SHW_PTE_IS_P(*pPteDst))
1595 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1596 else if (SHW_PTE_GET_HCPHYS(*pPteDst) != SHW_PTE_GET_HCPHYS(PteDst))
1597 {
1598 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst), SHW_PTE_LOG64(PteDst)));
1599 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1600 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1601 }
1602 }
1603 else if (SHW_PTE_IS_P(*pPteDst))
1604 {
1605 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1606 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1607 }
1608
1609 /*
1610 * Update statistics and commit the entry.
1611 */
1612# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1613 if (!(PteSrc.u & X86_PTE_G))
1614 pShwPage->fSeenNonGlobal = true;
1615# endif
1616 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
1617 return;
1618 }
1619
1620/** @todo count these three different kinds. */
1621 Log2(("SyncPageWorker: invalid address in Pte\n"));
1622 }
1623# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1624 else if (!(PteSrc.u & X86_PTE_P))
1625 Log2(("SyncPageWorker: page not present in Pte\n"));
1626 else
1627 Log2(("SyncPageWorker: invalid Pte\n"));
1628# endif
1629
1630 /*
1631 * The page is not present or the PTE is bad. Replace the shadow PTE by
1632 * an empty entry, making sure to keep the user tracking up to date.
1633 */
1634 if (SHW_PTE_IS_P(*pPteDst))
1635 {
1636 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1637 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1638 }
1639 SHW_PTE_ATOMIC_SET(*pPteDst, 0);
1640}
1641
1642
1643/**
1644 * Syncs a guest OS page.
1645 *
1646 * There are no conflicts at this point, neither is there any need for
1647 * page table allocations.
1648 *
1649 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
1650 * When called in AMD64 guest mode, the guest PML4E shall be valid.
1651 *
1652 * @returns VBox status code.
1653 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1654 * @param pVCpu The cross context virtual CPU structure.
1655 * @param PdeSrc Page directory entry of the guest.
1656 * @param GCPtrPage Guest context page address.
1657 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1658 * @param uErr Fault error (X86_TRAP_PF_*).
1659 */
1660static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1661{
1662 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1663 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
1664 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1665 RT_NOREF_PV(uErr); RT_NOREF_PV(cPages); RT_NOREF_PV(GCPtrPage);
1666
1667 PGM_LOCK_ASSERT_OWNER(pVM);
1668
1669# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1670 || PGM_GST_TYPE == PGM_TYPE_PAE \
1671 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1672 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE)
1673
1674 /*
1675 * Assert preconditions.
1676 */
1677 Assert(PdeSrc.u & X86_PDE_P);
1678 Assert(cPages);
1679# if 0 /* rarely useful; leave for debugging. */
1680 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1681# endif
1682
1683 /*
1684 * Get the shadow PDE, find the shadow page table in the pool.
1685 */
1686# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1687 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1688 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1689
1690 /* Fetch the pgm pool shadow descriptor. */
1691 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1692 Assert(pShwPde);
1693
1694# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1695 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1696 PPGMPOOLPAGE pShwPde = NULL;
1697 PX86PDPAE pPDDst;
1698
1699 /* Fetch the pgm pool shadow descriptor. */
1700 int rc2 = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
1701 AssertRCSuccessReturn(rc2, rc2);
1702 Assert(pShwPde);
1703
1704 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1705 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1706
1707# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1708 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1709 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1710 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1711 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1712
1713 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1714 AssertRCSuccessReturn(rc2, rc2);
1715 Assert(pPDDst && pPdptDst);
1716 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1717# endif
1718 SHWPDE PdeDst = *pPdeDst;
1719
1720 /*
1721 * - In the guest SMP case we could have blocked while another VCPU reused
1722 * this page table.
1723 * - With W7-64 we may also take this path when the A bit is cleared on
1724 * higher level tables (PDPE/PML4E). The guest does not invalidate the
1725 * relevant TLB entries. If we're write monitoring any page mapped by
1726 * the modified entry, we may end up here with a "stale" TLB entry.
1727 */
1728 if (!(PdeDst.u & X86_PDE_P))
1729 {
1730 Log(("CPU%u: SyncPage: Pde at %RGv changed behind our back? (pPdeDst=%p/%RX64) uErr=%#x\n", pVCpu->idCpu, GCPtrPage, pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
1731 AssertMsg(pVM->cCpus > 1 || (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW),
1732 ("Unexpected missing PDE p=%p/%RX64 uErr=%#x\n", pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
1733 if (uErr & X86_TRAP_PF_P)
1734 PGM_INVL_PG(pVCpu, GCPtrPage);
1735 return VINF_SUCCESS; /* force the instruction to be executed again. */
1736 }
1737
1738 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1739 Assert(pShwPage);
1740
1741# if PGM_GST_TYPE == PGM_TYPE_AMD64
1742 /* Fetch the pgm pool shadow descriptor. */
1743 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1744 Assert(pShwPde);
1745# endif
1746
1747 /*
1748 * Check that the page is present and that the shadow PDE isn't out of sync.
1749 */
1750 const bool fBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
1751 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
1752 RTGCPHYS GCPhys;
1753 if (!fBigPage)
1754 {
1755 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1756# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1757 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1758 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (PAGE_SIZE / 2)));
1759# endif
1760 }
1761 else
1762 {
1763 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1764# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1765 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1766 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
1767# endif
1768 }
1769 /** @todo This doesn't check the G bit of 2/4MB pages. FIXME */
1770 if ( fPdeValid
1771 && pShwPage->GCPhys == GCPhys
1772 && (PdeSrc.u & X86_PDE_P)
1773 && (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
1774 && ((PdeSrc.u & X86_PDE_RW) == (PdeDst.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW))
1775# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1776 && ((PdeSrc.u & X86_PDE_PAE_NX) == (PdeDst.u & X86_PDE_PAE_NX) || !GST_IS_NX_ACTIVE(pVCpu))
1777# endif
1778 )
1779 {
1780 /*
1781 * Check that the PDE is marked accessed already.
1782 * Since we set the accessed bit *before* getting here on a #PF, this
1783 * check is only meant for dealing with non-#PF'ing paths.
1784 */
1785 if (PdeSrc.u & X86_PDE_A)
1786 {
1787 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1788 if (!fBigPage)
1789 {
1790 /*
1791 * 4KB Page - Map the guest page table.
1792 */
1793 PGSTPT pPTSrc;
1794 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1795 if (RT_SUCCESS(rc))
1796 {
1797# ifdef PGM_SYNC_N_PAGES
1798 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1799 if ( cPages > 1
1800 && !(uErr & X86_TRAP_PF_P)
1801 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
1802 {
1803 /*
1804 * This code path is currently only taken when the caller is PGMTrap0eHandler
1805 * for non-present pages!
1806 *
1807 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1808 * deal with locality.
1809 */
1810 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1811# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1812 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1813 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1814# else
1815 const unsigned offPTSrc = 0;
1816# endif
1817 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1818 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1819 iPTDst = 0;
1820 else
1821 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1822
1823 for (; iPTDst < iPTDstEnd; iPTDst++)
1824 {
1825 const PGSTPTE pPteSrc = &pPTSrc->a[offPTSrc + iPTDst];
1826
1827 if ( (pPteSrc->u & X86_PTE_P)
1828 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
1829 {
1830 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1831 NOREF(GCPtrCurPage);
1832 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, *pPteSrc, pShwPage, iPTDst);
1833 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1834 GCPtrCurPage, pPteSrc->u & X86_PTE_P,
1835 !!(pPteSrc->u & PdeSrc.u & X86_PTE_RW),
1836 !!(pPteSrc->u & PdeSrc.u & X86_PTE_US),
1837 (uint64_t)pPteSrc->u,
1838 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1839 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1840 }
1841 }
1842 }
1843 else
1844# endif /* PGM_SYNC_N_PAGES */
1845 {
1846 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1847 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1848 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1849 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1850 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1851 GCPtrPage, PteSrc.u & X86_PTE_P,
1852 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
1853 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
1854 (uint64_t)PteSrc.u,
1855 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1856 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1857 }
1858 }
1859 else /* MMIO or invalid page: emulated in #PF handler. */
1860 {
1861 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1862 Assert(!SHW_PTE_IS_P(pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK]));
1863 }
1864 }
1865 else
1866 {
1867 /*
1868 * 4/2MB page - lazy syncing shadow 4K pages.
1869 * (There are many causes of getting here, it's no longer only CSAM.)
1870 */
1871 /* Calculate the GC physical address of this 4KB shadow page. */
1872 GCPhys = PGM_A20_APPLY(pVCpu, GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK));
1873 /* Find ram range. */
1874 PPGMPAGE pPage;
1875 int rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
1876 if (RT_SUCCESS(rc))
1877 {
1878 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
1879
1880# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1881 /* Try to make the page writable if necessary. */
1882 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1883 && ( PGM_PAGE_IS_ZERO(pPage)
1884 || ( (PdeSrc.u & X86_PDE_RW)
1885 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1886# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1887 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1888# endif
1889# ifdef VBOX_WITH_PAGE_SHARING
1890 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1891# endif
1892 )
1893 )
1894 )
1895 {
1896 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1897 AssertRC(rc);
1898 }
1899# endif
1900
1901 /*
1902 * Make shadow PTE entry.
1903 */
1904 SHWPTE PteDst;
1905 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1906 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc), &PteDst);
1907 else
1908 SHW_PTE_SET(PteDst, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc) | PGM_PAGE_GET_HCPHYS(pPage));
1909
1910 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1911 if ( SHW_PTE_IS_P(PteDst)
1912 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
1913 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1914
1915 /* Make sure only allocated pages are mapped writable. */
1916 if ( SHW_PTE_IS_P_RW(PteDst)
1917 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1918 {
1919 /* Still applies to shared pages. */
1920 Assert(!PGM_PAGE_IS_ZERO(pPage));
1921 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
1922 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1923 }
1924
1925 SHW_PTE_ATOMIC_SET2(pPTDst->a[iPTDst], PteDst);
1926
1927 /*
1928 * If the page is not flagged as dirty and is writable, then make it read-only
1929 * at PD level, so we can set the dirty bit when the page is modified.
1930 *
1931 * ASSUMES that page access handlers are implemented on page table entry level.
1932 * Thus we will first catch the dirty access and set PDE.D and restart. If
1933 * there is an access handler, we'll trap again and let it work on the problem.
1934 */
1935 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1936 * As for invlpg, it simply frees the whole shadow PT.
1937 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1938 if ((PdeSrc.u & (X86_PDE4M_D | X86_PDE_RW)) == X86_PDE_RW)
1939 {
1940 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
1941 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1942 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
1943 }
1944 else
1945 {
1946 PdeDst.u &= ~(SHWUINT)(PGM_PDFLAGS_TRACK_DIRTY | X86_PDE_RW);
1947 PdeDst.u |= PdeSrc.u & X86_PDE_RW;
1948 }
1949 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
1950 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1951 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US),
1952 (uint64_t)PdeSrc.u, GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1953 }
1954 else
1955 {
1956 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1957 /** @todo must wipe the shadow page table entry in this
1958 * case. */
1959 }
1960 }
1961 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
1962 return VINF_SUCCESS;
1963 }
1964
1965 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDNAs));
1966 }
1967 else if (fPdeValid)
1968 {
1969 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1970 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1971 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1972 }
1973 else
1974 {
1975/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSyncAndInvalid));
1976 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1977 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1978 }
1979
1980 /*
1981 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1982 * Yea, I'm lazy.
1983 */
1984 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1985 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1986
1987 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
1988 PGM_INVL_VCPU_TLBS(pVCpu);
1989 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1990
1991
1992# elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1993 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
1994 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
1995 NOREF(PdeSrc);
1996
1997# ifdef PGM_SYNC_N_PAGES
1998 /*
1999 * Get the shadow PDE, find the shadow page table in the pool.
2000 */
2001# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2002 X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
2003
2004# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2005 X86PDEPAE PdeDst = pgmShwGetPaePDE(pVCpu, GCPtrPage);
2006
2007# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2008 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2009 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2010 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2011 X86PDEPAE PdeDst;
2012 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2013
2014 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2015 AssertRCSuccessReturn(rc, rc);
2016 Assert(pPDDst && pPdptDst);
2017 PdeDst = pPDDst->a[iPDDst];
2018
2019# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2020 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2021 PEPTPD pPDDst;
2022 EPTPDE PdeDst;
2023
2024 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2025 if (rc != VINF_SUCCESS)
2026 {
2027 AssertRC(rc);
2028 return rc;
2029 }
2030 Assert(pPDDst);
2031 PdeDst = pPDDst->a[iPDDst];
2032# endif
2033 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2034 if (!SHW_PDE_IS_P(PdeDst))
2035 {
2036 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2037 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2038 return VINF_SUCCESS; /* force the instruction to be executed again. */
2039 }
2040
2041 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2042 if (SHW_PDE_IS_BIG(PdeDst))
2043 {
2044 Assert(pVM->pgm.s.fNestedPaging);
2045 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2046 return VINF_SUCCESS;
2047 }
2048
2049 /* Mask away the page offset. */
2050 GCPtrPage &= ~((RTGCPTR)0xfff);
2051
2052 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2053 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2054
2055 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2056 if ( cPages > 1
2057 && !(uErr & X86_TRAP_PF_P)
2058 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2059 {
2060 /*
2061 * This code path is currently only taken when the caller is PGMTrap0eHandler
2062 * for non-present pages!
2063 *
2064 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2065 * deal with locality.
2066 */
2067 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2068 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2069 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2070 iPTDst = 0;
2071 else
2072 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2073 for (; iPTDst < iPTDstEnd; iPTDst++)
2074 {
2075 if (!SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2076 {
2077 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2078 | (iPTDst << PAGE_SHIFT));
2079
2080 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2081 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
2082 GCPtrCurPage,
2083 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2084 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2085
2086 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
2087 break;
2088 }
2089 else
2090 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, SHW_PTE_LOG64(pPTDst->a[iPTDst]) ));
2091 }
2092 }
2093 else
2094# endif /* PGM_SYNC_N_PAGES */
2095 {
2096 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2097 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2098 | (iPTDst << PAGE_SHIFT));
2099
2100 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2101
2102 Log2(("SyncPage: 4K %RGv PteSrc:{P=1 RW=1 U=1}PteDst=%08llx%s\n",
2103 GCPtrPage,
2104 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2105 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2106 }
2107 return VINF_SUCCESS;
2108
2109# else
2110 NOREF(PdeSrc);
2111 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2112 return VERR_PGM_NOT_USED_IN_MODE;
2113# endif
2114}
2115
2116#endif /* PGM_SHW_TYPE != PGM_TYPE_NONE */
2117#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
2118
2119/**
2120 * CheckPageFault helper for returning a page fault indicating a non-present
2121 * (NP) entry in the page translation structures.
2122 *
2123 * @returns VINF_EM_RAW_GUEST_TRAP.
2124 * @param pVCpu The cross context virtual CPU structure.
2125 * @param uErr The error code of the shadow fault. Corrections to
2126 * TRPM's copy will be made if necessary.
2127 * @param GCPtrPage For logging.
2128 * @param uPageFaultLevel For logging.
2129 */
2130DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnNP)(PVMCPUCC pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2131{
2132 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyTrackRealPF));
2133 AssertMsg(!(uErr & X86_TRAP_PF_P), ("%#x\n", uErr));
2134 AssertMsg(!(uErr & X86_TRAP_PF_RSVD), ("%#x\n", uErr));
2135 if (uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2136 TRPMSetErrorCode(pVCpu, uErr & ~(X86_TRAP_PF_RSVD | X86_TRAP_PF_P));
2137
2138 Log(("CheckPageFault: real page fault (notp) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2139 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(uPageFaultLevel);
2140 return VINF_EM_RAW_GUEST_TRAP;
2141}
2142
2143
2144/**
2145 * CheckPageFault helper for returning a page fault indicating a reserved bit
2146 * (RSVD) error in the page translation structures.
2147 *
2148 * @returns VINF_EM_RAW_GUEST_TRAP.
2149 * @param pVCpu The cross context virtual CPU structure.
2150 * @param uErr The error code of the shadow fault. Corrections to
2151 * TRPM's copy will be made if necessary.
2152 * @param GCPtrPage For logging.
2153 * @param uPageFaultLevel For logging.
2154 */
2155DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnRSVD)(PVMCPUCC pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2156{
2157 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyTrackRealPF));
2158 if ((uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2159 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_RSVD | X86_TRAP_PF_P);
2160
2161 Log(("CheckPageFault: real page fault (rsvd) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2162 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(uPageFaultLevel);
2163 return VINF_EM_RAW_GUEST_TRAP;
2164}
2165
2166
2167/**
2168 * CheckPageFault helper for returning a page protection fault (P).
2169 *
2170 * @returns VINF_EM_RAW_GUEST_TRAP.
2171 * @param pVCpu The cross context virtual CPU structure.
2172 * @param uErr The error code of the shadow fault. Corrections to
2173 * TRPM's copy will be made if necessary.
2174 * @param GCPtrPage For logging.
2175 * @param uPageFaultLevel For logging.
2176 */
2177DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnProt)(PVMCPUCC pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2178{
2179 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyTrackRealPF));
2180 AssertMsg(uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID), ("%#x\n", uErr));
2181 if ((uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) != X86_TRAP_PF_P)
2182 TRPMSetErrorCode(pVCpu, (uErr & ~X86_TRAP_PF_RSVD) | X86_TRAP_PF_P);
2183
2184 Log(("CheckPageFault: real page fault (prot) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2185 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(uPageFaultLevel);
2186 return VINF_EM_RAW_GUEST_TRAP;
2187}
2188
2189
2190/**
2191 * Handle dirty bit tracking faults.
2192 *
2193 * @returns VBox status code.
2194 * @param pVCpu The cross context virtual CPU structure.
2195 * @param uErr Page fault error code.
2196 * @param pPdeSrc Guest page directory entry.
2197 * @param pPdeDst Shadow page directory entry.
2198 * @param GCPtrPage Guest context page address.
2199 */
2200static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc,
2201 RTGCPTR GCPtrPage)
2202{
2203 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2204 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2205 NOREF(uErr);
2206
2207 PGM_LOCK_ASSERT_OWNER(pVM);
2208
2209 /*
2210 * Handle big page.
2211 */
2212 if ((pPdeSrc->u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu))
2213 {
2214 if ((pPdeDst->u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
2215 {
2216 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
2217 Assert(pPdeSrc->u & X86_PDE_RW);
2218
2219 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2220 * fault again and take this path to only invalidate the entry (see below). */
2221 SHWPDE PdeDst = *pPdeDst;
2222 PdeDst.u &= ~(SHWUINT)PGM_PDFLAGS_TRACK_DIRTY;
2223 PdeDst.u |= X86_PDE_RW | X86_PDE_A;
2224 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2225 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2226 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2227 }
2228
2229# ifdef IN_RING0
2230 /* Check for stale TLB entry; only applies to the SMP guest case. */
2231 if ( pVM->cCpus > 1
2232 && (pPdeDst->u & (X86_PDE_P | X86_PDE_RW | X86_PDE_A)) == (X86_PDE_P | X86_PDE_RW | X86_PDE_A))
2233 {
2234 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2235 if (pShwPage)
2236 {
2237 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2238 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2239 if (SHW_PTE_IS_P_RW(*pPteDst))
2240 {
2241 /* Stale TLB entry. */
2242 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
2243 PGM_INVL_PG(pVCpu, GCPtrPage);
2244 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2245 }
2246 }
2247 }
2248# endif /* IN_RING0 */
2249 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2250 }
2251
2252 /*
2253 * Map the guest page table.
2254 */
2255 PGSTPT pPTSrc;
2256 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(*pPdeSrc), &pPTSrc);
2257 AssertRCReturn(rc, rc);
2258
2259 if (SHW_PDE_IS_P(*pPdeDst))
2260 {
2261 GSTPTE const *pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2262 const GSTPTE PteSrc = *pPteSrc;
2263
2264 /*
2265 * Map shadow page table.
2266 */
2267 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2268 if (pShwPage)
2269 {
2270 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2271 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2272 if (SHW_PTE_IS_P(*pPteDst)) /** @todo Optimize accessed bit emulation? */
2273 {
2274 if (SHW_PTE_IS_TRACK_DIRTY(*pPteDst))
2275 {
2276 PPGMPAGE pPage = pgmPhysGetPage(pVM, GST_GET_PTE_GCPHYS(PteSrc));
2277 SHWPTE PteDst = *pPteDst;
2278
2279 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2280 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
2281
2282 Assert(PteSrc.u & X86_PTE_RW);
2283
2284 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
2285 * entry will not harm; write access will simply fault again and
2286 * take this path to only invalidate the entry.
2287 */
2288 if (RT_LIKELY(pPage))
2289 {
2290 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2291 {
2292 //AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
2293 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
2294 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2295 SHW_PTE_SET_RO(PteDst);
2296 }
2297 else
2298 {
2299 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2300 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2301 {
2302 rc = pgmPhysPageMakeWritable(pVM, pPage, GST_GET_PTE_GCPHYS(PteSrc));
2303 AssertRC(rc);
2304 }
2305 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2306 SHW_PTE_SET_RW(PteDst);
2307 else
2308 {
2309 /* Still applies to shared pages. */
2310 Assert(!PGM_PAGE_IS_ZERO(pPage));
2311 SHW_PTE_SET_RO(PteDst);
2312 }
2313 }
2314 }
2315 else
2316 SHW_PTE_SET_RW(PteDst); /** @todo r=bird: This doesn't make sense to me. */
2317
2318 SHW_PTE_SET(PteDst, (SHW_PTE_GET_U(PteDst) | X86_PTE_D | X86_PTE_A) & ~(uint64_t)PGM_PTFLAGS_TRACK_DIRTY);
2319 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
2320 PGM_INVL_PG(pVCpu, GCPtrPage);
2321 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2322 }
2323
2324# ifdef IN_RING0
2325 /* Check for stale TLB entry; only applies to the SMP guest case. */
2326 if ( pVM->cCpus > 1
2327 && SHW_PTE_IS_RW(*pPteDst)
2328 && SHW_PTE_IS_A(*pPteDst))
2329 {
2330 /* Stale TLB entry. */
2331 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
2332 PGM_INVL_PG(pVCpu, GCPtrPage);
2333 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2334 }
2335# endif
2336 }
2337 }
2338 else
2339 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2340 }
2341
2342 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2343}
2344
2345#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
2346
2347/**
2348 * Sync a shadow page table.
2349 *
2350 * The shadow page table is not present in the shadow PDE.
2351 *
2352 * Handles mapping conflicts.
2353 *
2354 * This is called by VerifyAccessSyncPage, PrefetchPage, InvalidatePage (on
2355 * conflict), and Trap0eHandler.
2356 *
2357 * A precondition for this method is that the shadow PDE is not present. The
2358 * caller must take the PGM lock before checking this and continue to hold it
2359 * when calling this method.
2360 *
2361 * @returns VBox status code.
2362 * @param pVCpu The cross context virtual CPU structure.
2363 * @param iPDSrc Page directory index.
2364 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2365 * Assume this is a temporary mapping.
2366 * @param GCPtrPage GC Pointer of the page that caused the fault
2367 */
2368static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2369{
2370 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2371 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2372
2373#if 0 /* rarely useful; leave for debugging. */
2374 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2375#endif
2376 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2377
2378 PGM_LOCK_ASSERT_OWNER(pVM);
2379
2380#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2381 || PGM_GST_TYPE == PGM_TYPE_PAE \
2382 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2383 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
2384 && PGM_SHW_TYPE != PGM_TYPE_NONE
2385 int rc = VINF_SUCCESS;
2386
2387 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2388
2389 /*
2390 * Some input validation first.
2391 */
2392 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2393
2394 /*
2395 * Get the relevant shadow PDE entry.
2396 */
2397# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2398 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2399 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
2400
2401 /* Fetch the pgm pool shadow descriptor. */
2402 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2403 Assert(pShwPde);
2404
2405# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2406 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2407 PPGMPOOLPAGE pShwPde = NULL;
2408 PX86PDPAE pPDDst;
2409 PSHWPDE pPdeDst;
2410
2411 /* Fetch the pgm pool shadow descriptor. */
2412 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
2413 AssertRCSuccessReturn(rc, rc);
2414 Assert(pShwPde);
2415
2416 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
2417 pPdeDst = &pPDDst->a[iPDDst];
2418
2419# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2420 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2421 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2422 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2423 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2424 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2425 AssertRCSuccessReturn(rc, rc);
2426 Assert(pPDDst);
2427 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2428
2429# endif
2430 SHWPDE PdeDst = *pPdeDst;
2431
2432# if PGM_GST_TYPE == PGM_TYPE_AMD64
2433 /* Fetch the pgm pool shadow descriptor. */
2434 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2435 Assert(pShwPde);
2436# endif
2437
2438 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P.*/
2439
2440 /*
2441 * Sync the page directory entry.
2442 */
2443 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2444 const bool fPageTable = !(PdeSrc.u & X86_PDE_PS) || !GST_IS_PSE_ACTIVE(pVCpu);
2445 if ( (PdeSrc.u & X86_PDE_P)
2446 && (fPageTable ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc)) )
2447 {
2448 /*
2449 * Allocate & map the page table.
2450 */
2451 PSHWPT pPTDst;
2452 PPGMPOOLPAGE pShwPage;
2453 RTGCPHYS GCPhys;
2454 if (fPageTable)
2455 {
2456 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
2457# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2458 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2459 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (PAGE_SIZE / 2)));
2460# endif
2461 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
2462 pShwPde->idx, iPDDst, false /*fLockPage*/,
2463 &pShwPage);
2464 }
2465 else
2466 {
2467 PGMPOOLACCESS enmAccess;
2468# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2469 const bool fNoExecute = (PdeSrc.u & X86_PDE_PAE_NX) && GST_IS_NX_ACTIVE(pVCpu);
2470# else
2471 const bool fNoExecute = false;
2472# endif
2473
2474 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
2475# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2476 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2477 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
2478# endif
2479 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2480 if (PdeSrc.u & X86_PDE_US)
2481 {
2482 if (PdeSrc.u & X86_PDE_RW)
2483 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2484 else
2485 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2486 }
2487 else
2488 {
2489 if (PdeSrc.u & X86_PDE_RW)
2490 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2491 else
2492 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2493 }
2494 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, PGM_A20_IS_ENABLED(pVCpu),
2495 pShwPde->idx, iPDDst, false /*fLockPage*/,
2496 &pShwPage);
2497 }
2498 if (rc == VINF_SUCCESS)
2499 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2500 else if (rc == VINF_PGM_CACHED_PAGE)
2501 {
2502 /*
2503 * The PT was cached, just hook it up.
2504 */
2505 if (fPageTable)
2506 PdeDst.u = pShwPage->Core.Key | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2507 else
2508 {
2509 PdeDst.u = pShwPage->Core.Key | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2510 /* (see explanation and assumptions further down.) */
2511 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
2512 {
2513 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2514 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2515 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2516 }
2517 }
2518 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2519 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2520 return VINF_SUCCESS;
2521 }
2522 else
2523 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
2524 /** @todo Why do we bother preserving X86_PDE_AVL_MASK here?
2525 * Both PGM_PDFLAGS_MAPPING and PGM_PDFLAGS_TRACK_DIRTY should be
2526 * irrelevant at this point. */
2527 PdeDst.u &= X86_PDE_AVL_MASK;
2528 PdeDst.u |= pShwPage->Core.Key;
2529
2530 /*
2531 * Page directory has been accessed (this is a fault situation, remember).
2532 */
2533 /** @todo
2534 * Well, when the caller is PrefetchPage or InvalidatePage is isn't a
2535 * fault situation. What's more, the Trap0eHandler has already set the
2536 * accessed bit. So, it's actually just VerifyAccessSyncPage which
2537 * might need setting the accessed flag.
2538 *
2539 * The best idea is to leave this change to the caller and add an
2540 * assertion that it's set already. */
2541 pPDSrc->a[iPDSrc].u |= X86_PDE_A;
2542 if (fPageTable)
2543 {
2544 /*
2545 * Page table - 4KB.
2546 *
2547 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2548 */
2549 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2550 GCPtrPage, PdeSrc.u & X86_PTE_P, !!(PdeSrc.u & X86_PTE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u));
2551 PGSTPT pPTSrc;
2552 rc = PGM_GCPHYS_2_PTR(pVM, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
2553 if (RT_SUCCESS(rc))
2554 {
2555 /*
2556 * Start by syncing the page directory entry so CSAM's TLB trick works.
2557 */
2558 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2559 | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2560 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2561 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2562
2563 /*
2564 * Directory/page user or supervisor privilege: (same goes for read/write)
2565 *
2566 * Directory Page Combined
2567 * U/S U/S U/S
2568 * 0 0 0
2569 * 0 1 0
2570 * 1 0 0
2571 * 1 1 1
2572 *
2573 * Simple AND operation. Table listed for completeness.
2574 *
2575 */
2576 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4K));
2577# ifdef PGM_SYNC_N_PAGES
2578 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2579 unsigned iPTDst = iPTBase;
2580 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2581 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2582 iPTDst = 0;
2583 else
2584 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2585# else /* !PGM_SYNC_N_PAGES */
2586 unsigned iPTDst = 0;
2587 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2588# endif /* !PGM_SYNC_N_PAGES */
2589 RTGCPTR GCPtrCur = (GCPtrPage & ~(RTGCPTR)((1 << SHW_PD_SHIFT) - 1))
2590 | ((RTGCPTR)iPTDst << PAGE_SHIFT);
2591# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2592 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2593 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2594# else
2595 const unsigned offPTSrc = 0;
2596# endif
2597 for (; iPTDst < iPTDstEnd; iPTDst++, GCPtrCur += PAGE_SIZE)
2598 {
2599 const unsigned iPTSrc = iPTDst + offPTSrc;
2600 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2601 if (PteSrc.u & X86_PTE_P)
2602 {
2603 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2604 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2605 GCPtrCur,
2606 PteSrc.u & X86_PTE_P,
2607 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
2608 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
2609 (uint64_t)PteSrc.u,
2610 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : "", SHW_PTE_LOG64(pPTDst->a[iPTDst]), iPTSrc, PdeSrc.au32[0],
2611 (RTGCPHYS)(GST_GET_PDE_GCPHYS(PdeSrc) + iPTSrc*sizeof(PteSrc)) ));
2612 }
2613 /* else: the page table was cleared by the pool */
2614 } /* for PTEs */
2615 }
2616 }
2617 else
2618 {
2619 /*
2620 * Big page - 2/4MB.
2621 *
2622 * We'll walk the ram range list in parallel and optimize lookups.
2623 * We will only sync one shadow page table at a time.
2624 */
2625 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4M));
2626
2627 /**
2628 * @todo It might be more efficient to sync only a part of the 4MB
2629 * page (similar to what we do for 4KB PDs).
2630 */
2631
2632 /*
2633 * Start by syncing the page directory entry.
2634 */
2635 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2636 | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2637
2638 /*
2639 * If the page is not flagged as dirty and is writable, then make it read-only
2640 * at PD level, so we can set the dirty bit when the page is modified.
2641 *
2642 * ASSUMES that page access handlers are implemented on page table entry level.
2643 * Thus we will first catch the dirty access and set PDE.D and restart. If
2644 * there is an access handler, we'll trap again and let it work on the problem.
2645 */
2646 /** @todo move the above stuff to a section in the PGM documentation. */
2647 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2648 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
2649 {
2650 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2651 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2652 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2653 }
2654 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2655 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2656
2657 /*
2658 * Fill the shadow page table.
2659 */
2660 /* Get address and flags from the source PDE. */
2661 SHWPTE PteDstBase;
2662 SHW_PTE_SET(PteDstBase, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc));
2663
2664 /* Loop thru the entries in the shadow PT. */
2665 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2666 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2667 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u, GCPtr,
2668 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2669 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
2670 unsigned iPTDst = 0;
2671 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2672 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2673 {
2674 if (pRam && GCPhys >= pRam->GCPhys)
2675 {
2676# ifndef PGM_WITH_A20
2677 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2678# endif
2679 do
2680 {
2681 /* Make shadow PTE. */
2682# ifdef PGM_WITH_A20
2683 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
2684# else
2685 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2686# endif
2687 SHWPTE PteDst;
2688
2689# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2690 /* Try to make the page writable if necessary. */
2691 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2692 && ( PGM_PAGE_IS_ZERO(pPage)
2693 || ( SHW_PTE_IS_RW(PteDstBase)
2694 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2695# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2696 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2697# endif
2698# ifdef VBOX_WITH_PAGE_SHARING
2699 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2700# endif
2701 && !PGM_PAGE_IS_BALLOONED(pPage))
2702 )
2703 )
2704 {
2705 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2706 AssertRCReturn(rc, rc);
2707 if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2708 break;
2709 }
2710# endif
2711
2712 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2713 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, SHW_PTE_GET_U(PteDstBase), &PteDst);
2714 else if (PGM_PAGE_IS_BALLOONED(pPage))
2715 SHW_PTE_SET(PteDst, 0); /* Handle ballooned pages at #PF time. */
2716 else
2717 SHW_PTE_SET(PteDst, PGM_PAGE_GET_HCPHYS(pPage) | SHW_PTE_GET_U(PteDstBase));
2718
2719 /* Only map writable pages writable. */
2720 if ( SHW_PTE_IS_P_RW(PteDst)
2721 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2722 {
2723 /* Still applies to shared pages. */
2724 Assert(!PGM_PAGE_IS_ZERO(pPage));
2725 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2726 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2727 }
2728
2729 if (SHW_PTE_IS_P(PteDst))
2730 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2731
2732 /* commit it (not atomic, new table) */
2733 pPTDst->a[iPTDst] = PteDst;
2734 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2735 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), SHW_PTE_IS_P(PteDst), SHW_PTE_IS_RW(PteDst), SHW_PTE_IS_US(PteDst), SHW_PTE_LOG64(PteDst),
2736 SHW_PTE_IS_TRACK_DIRTY(PteDst) ? " Track-Dirty" : ""));
2737
2738 /* advance */
2739 GCPhys += PAGE_SIZE;
2740 PGM_A20_APPLY_TO_VAR(pVCpu, GCPhys);
2741# ifndef PGM_WITH_A20
2742 iHCPage++;
2743# endif
2744 iPTDst++;
2745 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2746 && GCPhys <= pRam->GCPhysLast);
2747
2748 /* Advance ram range list. */
2749 while (pRam && GCPhys > pRam->GCPhysLast)
2750 pRam = pRam->CTX_SUFF(pNext);
2751 }
2752 else if (pRam)
2753 {
2754 Log(("Invalid pages at %RGp\n", GCPhys));
2755 do
2756 {
2757 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2758 GCPhys += PAGE_SIZE;
2759 iPTDst++;
2760 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2761 && GCPhys < pRam->GCPhys);
2762 PGM_A20_APPLY_TO_VAR(pVCpu,GCPhys);
2763 }
2764 else
2765 {
2766 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2767 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2768 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2769 }
2770 } /* while more PTEs */
2771 } /* 4KB / 4MB */
2772 }
2773 else
2774 AssertRelease(!SHW_PDE_IS_P(PdeDst));
2775
2776 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2777 if (RT_FAILURE(rc))
2778 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
2779 return rc;
2780
2781#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2782 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
2783 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2784 && PGM_SHW_TYPE != PGM_TYPE_NONE
2785 NOREF(iPDSrc); NOREF(pPDSrc);
2786
2787 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2788
2789 /*
2790 * Validate input a little bit.
2791 */
2792 int rc = VINF_SUCCESS;
2793# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2794 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2795 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
2796
2797 /* Fetch the pgm pool shadow descriptor. */
2798 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2799 Assert(pShwPde);
2800
2801# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2802 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2803 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
2804 PX86PDPAE pPDDst;
2805 PSHWPDE pPdeDst;
2806
2807 /* Fetch the pgm pool shadow descriptor. */
2808 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
2809 AssertRCSuccessReturn(rc, rc);
2810 Assert(pShwPde);
2811
2812 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
2813 pPdeDst = &pPDDst->a[iPDDst];
2814
2815# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2816 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2817 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2818 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2819 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
2820 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2821 AssertRCSuccessReturn(rc, rc);
2822 Assert(pPDDst);
2823 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2824
2825 /* Fetch the pgm pool shadow descriptor. */
2826 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2827 Assert(pShwPde);
2828
2829# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2830 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2831 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2832 PEPTPD pPDDst;
2833 PEPTPDPT pPdptDst;
2834
2835 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
2836 if (rc != VINF_SUCCESS)
2837 {
2838 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2839 AssertRC(rc);
2840 return rc;
2841 }
2842 Assert(pPDDst);
2843 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2844
2845 /* Fetch the pgm pool shadow descriptor. */
2846 /** @todo r=bird: didn't pgmShwGetEPTPDPtr just do this lookup already? */
2847 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2848 Assert(pShwPde);
2849# endif
2850 SHWPDE PdeDst = *pPdeDst;
2851
2852 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2853
2854# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
2855 if ( BTH_IS_NP_ACTIVE(pVM)
2856 && !VM_IS_NEM_ENABLED(pVM)) /** @todo NEM: Large page support. */
2857 {
2858 /* Check if we allocated a big page before for this 2 MB range. */
2859 PPGMPAGE pPage;
2860 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PDE2M_PAE_PG_MASK), &pPage);
2861 if (RT_SUCCESS(rc))
2862 {
2863 RTHCPHYS HCPhys = NIL_RTHCPHYS;
2864 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
2865 {
2866 if (PGM_A20_IS_ENABLED(pVCpu))
2867 {
2868 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
2869 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2870 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2871 }
2872 else
2873 {
2874 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
2875 pVM->pgm.s.cLargePagesDisabled++;
2876 }
2877 }
2878 else if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED
2879 && PGM_A20_IS_ENABLED(pVCpu))
2880 {
2881 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
2882 rc = pgmPhysRecheckLargePage(pVM, GCPtrPage, pPage);
2883 if (RT_SUCCESS(rc))
2884 {
2885 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2886 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
2887 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2888 }
2889 }
2890 else if ( PGMIsUsingLargePages(pVM)
2891 && PGM_A20_IS_ENABLED(pVCpu))
2892 {
2893 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
2894 if (RT_SUCCESS(rc))
2895 {
2896 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2897 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
2898 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2899 }
2900 else
2901 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
2902 }
2903
2904 if (HCPhys != NIL_RTHCPHYS)
2905 {
2906# if PGM_SHW_TYPE == PGM_TYPE_EPT
2907 PdeDst.u = HCPhys | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_LEAF | EPT_E_IGNORE_PAT | EPT_E_TYPE_WB
2908 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo do we need this? */;
2909# else
2910 PdeDst.u = HCPhys | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PS
2911 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo PGM_PD_FLAGS? */;
2912# endif
2913 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2914
2915 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
2916 /* Add a reference to the first page only. */
2917 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
2918
2919 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2920 return VINF_SUCCESS;
2921 }
2922 }
2923 }
2924# endif /* defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE */
2925
2926 /*
2927 * Allocate & map the page table.
2928 */
2929 PSHWPT pPTDst;
2930 PPGMPOOLPAGE pShwPage;
2931 RTGCPHYS GCPhys;
2932
2933 /* Virtual address = physical address */
2934 GCPhys = PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PAGE_4K_BASE_MASK);
2935 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE,
2936 PGM_A20_IS_ENABLED(pVCpu), pShwPde->idx, iPDDst, false /*fLockPage*/,
2937 &pShwPage);
2938 if ( rc == VINF_SUCCESS
2939 || rc == VINF_PGM_CACHED_PAGE)
2940 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2941 else
2942 {
2943 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2944 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
2945 }
2946
2947 if (rc == VINF_SUCCESS)
2948 {
2949 /* New page table; fully set it up. */
2950 Assert(pPTDst);
2951
2952 /* Mask away the page offset. */
2953 GCPtrPage &= ~(RTGCPTR)PAGE_OFFSET_MASK;
2954
2955 for (unsigned iPTDst = 0; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2956 {
2957 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2958 | (iPTDst << PAGE_SHIFT));
2959
2960 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2961 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
2962 GCPtrCurPage,
2963 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2964 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2965
2966 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
2967 break;
2968 }
2969 }
2970 else
2971 rc = VINF_SUCCESS; /* Cached entry; assume it's still fully valid. */
2972
2973 /* Save the new PDE. */
2974# if PGM_SHW_TYPE == PGM_TYPE_EPT
2975 PdeDst.u = pShwPage->Core.Key | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE
2976 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo do we really need this? */);
2977# else
2978 PdeDst.u = pShwPage->Core.Key | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A
2979 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo use a PGM_PD_FLAGS define */);
2980# endif
2981 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2982
2983 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2984 if (RT_FAILURE(rc))
2985 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
2986 return rc;
2987
2988#else
2989 NOREF(iPDSrc); NOREF(pPDSrc);
2990 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
2991 return VERR_PGM_NOT_USED_IN_MODE;
2992#endif
2993}
2994
2995
2996
2997/**
2998 * Prefetch a page/set of pages.
2999 *
3000 * Typically used to sync commonly used pages before entering raw mode
3001 * after a CR3 reload.
3002 *
3003 * @returns VBox status code.
3004 * @param pVCpu The cross context virtual CPU structure.
3005 * @param GCPtrPage Page to invalidate.
3006 */
3007PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
3008{
3009#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3010 || PGM_GST_TYPE == PGM_TYPE_REAL \
3011 || PGM_GST_TYPE == PGM_TYPE_PROT \
3012 || PGM_GST_TYPE == PGM_TYPE_PAE \
3013 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3014 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3015 && PGM_SHW_TYPE != PGM_TYPE_NONE
3016 /*
3017 * Check that all Guest levels thru the PDE are present, getting the
3018 * PD and PDE in the processes.
3019 */
3020 int rc = VINF_SUCCESS;
3021# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3022# if PGM_GST_TYPE == PGM_TYPE_32BIT
3023 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
3024 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3025# elif PGM_GST_TYPE == PGM_TYPE_PAE
3026 unsigned iPDSrc;
3027 X86PDPE PdpeSrc;
3028 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3029 if (!pPDSrc)
3030 return VINF_SUCCESS; /* not present */
3031# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3032 unsigned iPDSrc;
3033 PX86PML4E pPml4eSrc;
3034 X86PDPE PdpeSrc;
3035 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3036 if (!pPDSrc)
3037 return VINF_SUCCESS; /* not present */
3038# endif
3039 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3040# else
3041 PGSTPD pPDSrc = NULL;
3042 const unsigned iPDSrc = 0;
3043 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
3044# endif
3045
3046 if ((PdeSrc.u & (X86_PDE_P | X86_PDE_A)) == (X86_PDE_P | X86_PDE_A))
3047 {
3048 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3049 PGM_LOCK_VOID(pVM);
3050
3051# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3052 const X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
3053# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3054 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3055 PX86PDPAE pPDDst;
3056 X86PDEPAE PdeDst;
3057# if PGM_GST_TYPE != PGM_TYPE_PAE
3058 X86PDPE PdpeSrc;
3059
3060 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3061 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3062# endif
3063 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3064 if (rc != VINF_SUCCESS)
3065 {
3066 PGM_UNLOCK(pVM);
3067 AssertRC(rc);
3068 return rc;
3069 }
3070 Assert(pPDDst);
3071 PdeDst = pPDDst->a[iPDDst];
3072
3073# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3074 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3075 PX86PDPAE pPDDst;
3076 X86PDEPAE PdeDst;
3077
3078# if PGM_GST_TYPE == PGM_TYPE_PROT
3079 /* AMD-V nested paging */
3080 X86PML4E Pml4eSrc;
3081 X86PDPE PdpeSrc;
3082 PX86PML4E pPml4eSrc = &Pml4eSrc;
3083
3084 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3085 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3086 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3087# endif
3088
3089 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3090 if (rc != VINF_SUCCESS)
3091 {
3092 PGM_UNLOCK(pVM);
3093 AssertRC(rc);
3094 return rc;
3095 }
3096 Assert(pPDDst);
3097 PdeDst = pPDDst->a[iPDDst];
3098# endif
3099 if (!(PdeDst.u & X86_PDE_P))
3100 {
3101 /** @todo r=bird: This guy will set the A bit on the PDE,
3102 * probably harmless. */
3103 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3104 }
3105 else
3106 {
3107 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3108 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3109 * makes no sense to prefetch more than one page.
3110 */
3111 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3112 if (RT_SUCCESS(rc))
3113 rc = VINF_SUCCESS;
3114 }
3115 PGM_UNLOCK(pVM);
3116 }
3117 return rc;
3118
3119#elif PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3120 NOREF(pVCpu); NOREF(GCPtrPage);
3121 return VINF_SUCCESS; /* ignore */
3122#else
3123 AssertCompile(0);
3124#endif
3125}
3126
3127
3128
3129
3130/**
3131 * Syncs a page during a PGMVerifyAccess() call.
3132 *
3133 * @returns VBox status code (informational included).
3134 * @param pVCpu The cross context virtual CPU structure.
3135 * @param GCPtrPage The address of the page to sync.
3136 * @param fPage The effective guest page flags.
3137 * @param uErr The trap error code.
3138 * @remarks This will normally never be called on invalid guest page
3139 * translation entries.
3140 */
3141PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3142{
3143 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
3144
3145 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3146 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(fPage); RT_NOREF_PV(uErr);
3147
3148 Assert(!pVM->pgm.s.fNestedPaging);
3149#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3150 || PGM_GST_TYPE == PGM_TYPE_REAL \
3151 || PGM_GST_TYPE == PGM_TYPE_PROT \
3152 || PGM_GST_TYPE == PGM_TYPE_PAE \
3153 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3154 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3155 && PGM_SHW_TYPE != PGM_TYPE_NONE
3156
3157 /*
3158 * Get guest PD and index.
3159 */
3160 /** @todo Performance: We've done all this a jiffy ago in the
3161 * PGMGstGetPage call. */
3162# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3163# if PGM_GST_TYPE == PGM_TYPE_32BIT
3164 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
3165 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3166
3167# elif PGM_GST_TYPE == PGM_TYPE_PAE
3168 unsigned iPDSrc = 0;
3169 X86PDPE PdpeSrc;
3170 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3171 if (RT_UNLIKELY(!pPDSrc))
3172 {
3173 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3174 return VINF_EM_RAW_GUEST_TRAP;
3175 }
3176
3177# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3178 unsigned iPDSrc = 0; /* shut up gcc */
3179 PX86PML4E pPml4eSrc = NULL; /* ditto */
3180 X86PDPE PdpeSrc;
3181 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3182 if (RT_UNLIKELY(!pPDSrc))
3183 {
3184 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3185 return VINF_EM_RAW_GUEST_TRAP;
3186 }
3187# endif
3188
3189# else /* !PGM_WITH_PAGING */
3190 PGSTPD pPDSrc = NULL;
3191 const unsigned iPDSrc = 0;
3192# endif /* !PGM_WITH_PAGING */
3193 int rc = VINF_SUCCESS;
3194
3195 PGM_LOCK_VOID(pVM);
3196
3197 /*
3198 * First check if the shadow pd is present.
3199 */
3200# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3201 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3202
3203# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3204 PX86PDEPAE pPdeDst;
3205 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3206 PX86PDPAE pPDDst;
3207# if PGM_GST_TYPE != PGM_TYPE_PAE
3208 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3209 X86PDPE PdpeSrc;
3210 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3211# endif
3212 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3213 if (rc != VINF_SUCCESS)
3214 {
3215 PGM_UNLOCK(pVM);
3216 AssertRC(rc);
3217 return rc;
3218 }
3219 Assert(pPDDst);
3220 pPdeDst = &pPDDst->a[iPDDst];
3221
3222# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3223 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3224 PX86PDPAE pPDDst;
3225 PX86PDEPAE pPdeDst;
3226
3227# if PGM_GST_TYPE == PGM_TYPE_PROT
3228 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3229 X86PML4E Pml4eSrc;
3230 X86PDPE PdpeSrc;
3231 PX86PML4E pPml4eSrc = &Pml4eSrc;
3232 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3233 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3234# endif
3235
3236 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3237 if (rc != VINF_SUCCESS)
3238 {
3239 PGM_UNLOCK(pVM);
3240 AssertRC(rc);
3241 return rc;
3242 }
3243 Assert(pPDDst);
3244 pPdeDst = &pPDDst->a[iPDDst];
3245# endif
3246
3247 if (!(pPdeDst->u & X86_PDE_P))
3248 {
3249 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3250 if (rc != VINF_SUCCESS)
3251 {
3252 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3253 PGM_UNLOCK(pVM);
3254 AssertRC(rc);
3255 return rc;
3256 }
3257 }
3258
3259# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3260 /* Check for dirty bit fault */
3261 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3262 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3263 Log(("PGMVerifyAccess: success (dirty)\n"));
3264 else
3265# endif
3266 {
3267# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3268 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3269# else
3270 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
3271# endif
3272
3273 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3274 if (uErr & X86_TRAP_PF_US)
3275 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
3276 else /* supervisor */
3277 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3278
3279 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3280 if (RT_SUCCESS(rc))
3281 {
3282 /* Page was successfully synced */
3283 Log2(("PGMVerifyAccess: success (sync)\n"));
3284 rc = VINF_SUCCESS;
3285 }
3286 else
3287 {
3288 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
3289 rc = VINF_EM_RAW_GUEST_TRAP;
3290 }
3291 }
3292 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3293 PGM_UNLOCK(pVM);
3294 return rc;
3295
3296#else /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
3297
3298 AssertLogRelMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3299 return VERR_PGM_NOT_USED_IN_MODE;
3300#endif /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
3301}
3302
3303
3304/**
3305 * Syncs the paging hierarchy starting at CR3.
3306 *
3307 * @returns VBox status code, R0/RC may return VINF_PGM_SYNC_CR3, no other
3308 * informational status codes.
3309 * @retval VERR_PGM_NO_HYPERVISOR_ADDRESS in raw-mode when we're unable to map
3310 * the VMM into guest context.
3311 * @param pVCpu The cross context virtual CPU structure.
3312 * @param cr0 Guest context CR0 register.
3313 * @param cr3 Guest context CR3 register. Not subjected to the A20
3314 * mask.
3315 * @param cr4 Guest context CR4 register.
3316 * @param fGlobal Including global page directories or not
3317 */
3318PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3319{
3320 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
3321 NOREF(cr0); NOREF(cr3); NOREF(cr4); NOREF(fGlobal);
3322
3323 LogFlow(("SyncCR3 FF=%d fGlobal=%d\n", !!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), fGlobal));
3324
3325#if !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
3326# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3327 PGM_LOCK_VOID(pVM);
3328 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3329 if (pPool->cDirtyPages)
3330 pgmPoolResetDirtyPages(pVM);
3331 PGM_UNLOCK(pVM);
3332# endif
3333#endif /* !NESTED && !EPT */
3334
3335#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3336 /*
3337 * Nested / EPT / None - No work.
3338 */
3339 return VINF_SUCCESS;
3340
3341#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3342 /*
3343 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3344 * out the shadow parts when the guest modifies its tables.
3345 */
3346 return VINF_SUCCESS;
3347
3348#else /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3349
3350 return VINF_SUCCESS;
3351#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3352}
3353
3354
3355
3356
3357#ifdef VBOX_STRICT
3358
3359/**
3360 * Checks that the shadow page table is in sync with the guest one.
3361 *
3362 * @returns The number of errors.
3363 * @param pVCpu The cross context virtual CPU structure.
3364 * @param cr3 Guest context CR3 register.
3365 * @param cr4 Guest context CR4 register.
3366 * @param GCPtr Where to start. Defaults to 0.
3367 * @param cb How much to check. Defaults to everything.
3368 */
3369PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3370{
3371 NOREF(pVCpu); NOREF(cr3); NOREF(cr4); NOREF(GCPtr); NOREF(cb);
3372#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3373 return 0;
3374#else
3375 unsigned cErrors = 0;
3376 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3377 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3378
3379# if PGM_GST_TYPE == PGM_TYPE_PAE
3380 /** @todo currently broken; crashes below somewhere */
3381 AssertFailed();
3382# endif
3383
3384# if PGM_GST_TYPE == PGM_TYPE_32BIT \
3385 || PGM_GST_TYPE == PGM_TYPE_PAE \
3386 || PGM_GST_TYPE == PGM_TYPE_AMD64
3387
3388 bool fBigPagesSupported = GST_IS_PSE_ACTIVE(pVCpu);
3389 PPGMCPU pPGM = &pVCpu->pgm.s;
3390 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3391 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3392# ifndef IN_RING0
3393 RTHCPHYS HCPhys; /* general usage. */
3394# endif
3395 int rc;
3396
3397 /*
3398 * Check that the Guest CR3 and all its mappings are correct.
3399 */
3400 AssertMsgReturn(pPGM->GCPhysCR3 == PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK),
3401 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3402 false);
3403# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3404# if 0
3405# if PGM_GST_TYPE == PGM_TYPE_32BIT
3406 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3407# else
3408 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3409# endif
3410 AssertRCReturn(rc, 1);
3411 HCPhys = NIL_RTHCPHYS;
3412 rc = pgmRamGCPhys2HCPhys(pVM, PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK), &HCPhys);
3413 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3414# endif
3415# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3416 pgmGstGet32bitPDPtr(pVCpu);
3417 RTGCPHYS GCPhys;
3418 rc = PGMR3DbgR3Ptr2GCPhys(pVM->pUVM, pPGM->pGst32BitPdR3, &GCPhys);
3419 AssertRCReturn(rc, 1);
3420 AssertMsgReturn(PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3421# endif
3422# endif /* !IN_RING0 */
3423
3424 /*
3425 * Get and check the Shadow CR3.
3426 */
3427# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3428 unsigned cPDEs = X86_PG_ENTRIES;
3429 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3430# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3431# if PGM_GST_TYPE == PGM_TYPE_32BIT
3432 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3433# else
3434 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3435# endif
3436 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3437# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3438 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3439 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3440# endif
3441 if (cb != ~(RTGCPTR)0)
3442 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3443
3444/** @todo call the other two PGMAssert*() functions. */
3445
3446# if PGM_GST_TYPE == PGM_TYPE_AMD64
3447 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3448
3449 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3450 {
3451 PPGMPOOLPAGE pShwPdpt = NULL;
3452 PX86PML4E pPml4eSrc;
3453 PX86PML4E pPml4eDst;
3454 RTGCPHYS GCPhysPdptSrc;
3455
3456 pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
3457 pPml4eDst = pgmShwGetLongModePML4EPtr(pVCpu, iPml4);
3458
3459 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3460 if (!(pPml4eDst->u & X86_PML4E_P))
3461 {
3462 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3463 continue;
3464 }
3465
3466 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3467 GCPhysPdptSrc = PGM_A20_APPLY(pVCpu, pPml4eSrc->u & X86_PML4E_PG_MASK);
3468
3469 if ((pPml4eSrc->u & X86_PML4E_P) != (pPml4eDst->u & X86_PML4E_P))
3470 {
3471 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3472 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3473 cErrors++;
3474 continue;
3475 }
3476
3477 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3478 {
3479 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3480 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3481 cErrors++;
3482 continue;
3483 }
3484
3485 if ( (pPml4eDst->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX))
3486 != (pPml4eSrc->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX)))
3487 {
3488 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3489 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3490 cErrors++;
3491 continue;
3492 }
3493# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3494 {
3495# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3496
3497# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3498 /*
3499 * Check the PDPTEs too.
3500 */
3501 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3502
3503 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3504 {
3505 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3506 PPGMPOOLPAGE pShwPde = NULL;
3507 PX86PDPE pPdpeDst;
3508 RTGCPHYS GCPhysPdeSrc;
3509 X86PDPE PdpeSrc;
3510 PdpeSrc.u = 0; /* initialized to shut up gcc 4.5 */
3511# if PGM_GST_TYPE == PGM_TYPE_PAE
3512 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
3513 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
3514# else
3515 PX86PML4E pPml4eSrcIgn;
3516 PX86PDPT pPdptDst;
3517 PX86PDPAE pPDDst;
3518 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3519
3520 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3521 if (rc != VINF_SUCCESS)
3522 {
3523 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3524 GCPtr += 512 * _2M;
3525 continue; /* next PDPTE */
3526 }
3527 Assert(pPDDst);
3528# endif
3529 Assert(iPDSrc == 0);
3530
3531 pPdpeDst = &pPdptDst->a[iPdpt];
3532
3533 if (!(pPdpeDst->u & X86_PDPE_P))
3534 {
3535 GCPtr += 512 * _2M;
3536 continue; /* next PDPTE */
3537 }
3538
3539 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3540 GCPhysPdeSrc = PGM_A20_APPLY(pVCpu, PdpeSrc.u & X86_PDPE_PG_MASK);
3541
3542 if ((pPdpeDst->u & X86_PDPE_P) != (PdpeSrc.u & X86_PDPE_P))
3543 {
3544 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3545 GCPtr += 512 * _2M;
3546 cErrors++;
3547 continue;
3548 }
3549
3550 if (GCPhysPdeSrc != pShwPde->GCPhys)
3551 {
3552# if PGM_GST_TYPE == PGM_TYPE_AMD64
3553 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3554# else
3555 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3556# endif
3557 GCPtr += 512 * _2M;
3558 cErrors++;
3559 continue;
3560 }
3561
3562# if PGM_GST_TYPE == PGM_TYPE_AMD64
3563 if ( (pPdpeDst->u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX))
3564 != (PdpeSrc.u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX)))
3565 {
3566 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3567 GCPtr += 512 * _2M;
3568 cErrors++;
3569 continue;
3570 }
3571# endif
3572
3573# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3574 {
3575# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3576# if PGM_GST_TYPE == PGM_TYPE_32BIT
3577 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3578# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3579 PCX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
3580# endif
3581# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3582 /*
3583 * Iterate the shadow page directory.
3584 */
3585 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3586 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3587
3588 for (;
3589 iPDDst < cPDEs;
3590 iPDDst++, GCPtr += cIncrement)
3591 {
3592# if PGM_SHW_TYPE == PGM_TYPE_PAE
3593 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pVCpu, GCPtr);
3594# else
3595 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3596# endif
3597 if ( (PdeDst.u & X86_PDE_P)
3598 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) )
3599 {
3600 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3601 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3602 if (!pPoolPage)
3603 {
3604 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3605 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3606 cErrors++;
3607 continue;
3608 }
3609 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pPoolPage);
3610
3611 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3612 {
3613 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3614 GCPtr, (uint64_t)PdeDst.u));
3615 cErrors++;
3616 }
3617
3618 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3619 {
3620 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3621 GCPtr, (uint64_t)PdeDst.u));
3622 cErrors++;
3623 }
3624
3625 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3626 if (!(PdeSrc.u & X86_PDE_P))
3627 {
3628 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3629 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3630 cErrors++;
3631 continue;
3632 }
3633
3634 if ( !(PdeSrc.u & X86_PDE_PS)
3635 || !fBigPagesSupported)
3636 {
3637 GCPhysGst = GST_GET_PDE_GCPHYS(PdeSrc);
3638# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3639 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | ((iPDDst & 1) * (PAGE_SIZE / 2)));
3640# endif
3641 }
3642 else
3643 {
3644# if PGM_GST_TYPE == PGM_TYPE_32BIT
3645 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3646 {
3647 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3648 GCPtr, (uint64_t)PdeSrc.u));
3649 cErrors++;
3650 continue;
3651 }
3652# endif
3653 GCPhysGst = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
3654# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3655 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | (GCPtr & RT_BIT(X86_PAGE_2M_SHIFT)));
3656# endif
3657 }
3658
3659 if ( pPoolPage->enmKind
3660 != (!(PdeSrc.u & X86_PDE_PS) || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3661 {
3662 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3663 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3664 cErrors++;
3665 }
3666
3667 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
3668 if (!pPhysPage)
3669 {
3670 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3671 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3672 cErrors++;
3673 continue;
3674 }
3675
3676 if (GCPhysGst != pPoolPage->GCPhys)
3677 {
3678 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3679 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3680 cErrors++;
3681 continue;
3682 }
3683
3684 if ( !(PdeSrc.u & X86_PDE_PS)
3685 || !fBigPagesSupported)
3686 {
3687 /*
3688 * Page Table.
3689 */
3690 const GSTPT *pPTSrc;
3691 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, PGM_A20_APPLY(pVCpu, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1)),
3692 &pPTSrc);
3693 if (RT_FAILURE(rc))
3694 {
3695 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3696 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3697 cErrors++;
3698 continue;
3699 }
3700 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3701 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3702 {
3703 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3704 // (This problem will go away when/if we shadow multiple CR3s.)
3705 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3706 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3707 cErrors++;
3708 continue;
3709 }
3710 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3711 {
3712 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3713 GCPtr, (uint64_t)PdeDst.u));
3714 cErrors++;
3715 continue;
3716 }
3717
3718 /* iterate the page table. */
3719# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3720 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3721 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3722# else
3723 const unsigned offPTSrc = 0;
3724# endif
3725 for (unsigned iPT = 0, off = 0;
3726 iPT < RT_ELEMENTS(pPTDst->a);
3727 iPT++, off += PAGE_SIZE)
3728 {
3729 const SHWPTE PteDst = pPTDst->a[iPT];
3730
3731 /* skip not-present and dirty tracked entries. */
3732 if (!(SHW_PTE_GET_U(PteDst) & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3733 continue;
3734 Assert(SHW_PTE_IS_P(PteDst));
3735
3736 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3737 if (!(PteSrc.u & X86_PTE_P))
3738 {
3739# ifdef IN_RING3
3740 PGMAssertHandlerAndFlagsInSync(pVM);
3741 DBGFR3PagingDumpEx(pVM->pUVM, pVCpu->idCpu, DBGFPGDMP_FLAGS_CURRENT_CR3 | DBGFPGDMP_FLAGS_CURRENT_MODE
3742 | DBGFPGDMP_FLAGS_GUEST | DBGFPGDMP_FLAGS_HEADER | DBGFPGDMP_FLAGS_PRINT_CR3,
3743 0, 0, UINT64_MAX, 99, NULL);
3744# endif
3745 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3746 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst), pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3747 (uint64_t)GST_GET_PDE_GCPHYS(PdeSrc) + (iPT + offPTSrc) * sizeof(PteSrc)));
3748 cErrors++;
3749 continue;
3750 }
3751
3752 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3753# if 1 /** @todo sync accessed bit properly... */
3754 fIgnoreFlags |= X86_PTE_A;
3755# endif
3756
3757 /* match the physical addresses */
3758 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
3759 GCPhysGst = GST_GET_PTE_GCPHYS(PteSrc);
3760
3761# ifdef IN_RING3
3762 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3763 if (RT_FAILURE(rc))
3764 {
3765 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3766 {
3767 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3768 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3769 cErrors++;
3770 continue;
3771 }
3772 }
3773 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3774 {
3775 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3776 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3777 cErrors++;
3778 continue;
3779 }
3780# endif
3781
3782 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
3783 if (!pPhysPage)
3784 {
3785# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3786 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3787 {
3788 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3789 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3790 cErrors++;
3791 continue;
3792 }
3793# endif
3794 if (SHW_PTE_IS_RW(PteDst))
3795 {
3796 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3797 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3798 cErrors++;
3799 }
3800 fIgnoreFlags |= X86_PTE_RW;
3801 }
3802 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
3803 {
3804 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3805 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3806 cErrors++;
3807 continue;
3808 }
3809
3810 /* flags */
3811 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3812 {
3813 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3814 {
3815 if (SHW_PTE_IS_RW(PteDst))
3816 {
3817 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3818 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3819 cErrors++;
3820 continue;
3821 }
3822 fIgnoreFlags |= X86_PTE_RW;
3823 }
3824 else
3825 {
3826 if ( SHW_PTE_IS_P(PteDst)
3827# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
3828 && !PGM_PAGE_IS_MMIO(pPhysPage)
3829# endif
3830 )
3831 {
3832 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3833 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3834 cErrors++;
3835 continue;
3836 }
3837 fIgnoreFlags |= X86_PTE_P;
3838 }
3839 }
3840 else
3841 {
3842 if ((PteSrc.u & (X86_PTE_RW | X86_PTE_D)) == X86_PTE_RW)
3843 {
3844 if (SHW_PTE_IS_RW(PteDst))
3845 {
3846 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3847 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3848 cErrors++;
3849 continue;
3850 }
3851 if (!SHW_PTE_IS_TRACK_DIRTY(PteDst))
3852 {
3853 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3854 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3855 cErrors++;
3856 continue;
3857 }
3858 if (SHW_PTE_IS_D(PteDst))
3859 {
3860 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3861 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3862 cErrors++;
3863 }
3864# if 0 /** @todo sync access bit properly... */
3865 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3866 {
3867 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3868 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3869 cErrors++;
3870 }
3871 fIgnoreFlags |= X86_PTE_RW;
3872# else
3873 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3874# endif
3875 }
3876 else if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
3877 {
3878 /* access bit emulation (not implemented). */
3879 if ((PteSrc.u & X86_PTE_A) || SHW_PTE_IS_P(PteDst))
3880 {
3881 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3882 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3883 cErrors++;
3884 continue;
3885 }
3886 if (!SHW_PTE_IS_A(PteDst))
3887 {
3888 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3889 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3890 cErrors++;
3891 }
3892 fIgnoreFlags |= X86_PTE_P;
3893 }
3894# ifdef DEBUG_sandervl
3895 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3896# endif
3897 }
3898
3899 if ( (PteSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
3900 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
3901 )
3902 {
3903 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3904 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
3905 fIgnoreFlags, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3906 cErrors++;
3907 continue;
3908 }
3909 } /* foreach PTE */
3910 }
3911 else
3912 {
3913 /*
3914 * Big Page.
3915 */
3916 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
3917 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
3918 {
3919 if (PdeDst.u & X86_PDE_RW)
3920 {
3921 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3922 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3923 cErrors++;
3924 continue;
3925 }
3926 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
3927 {
3928 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3929 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3930 cErrors++;
3931 continue;
3932 }
3933# if 0 /** @todo sync access bit properly... */
3934 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
3935 {
3936 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3937 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3938 cErrors++;
3939 }
3940 fIgnoreFlags |= X86_PTE_RW;
3941# else
3942 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3943# endif
3944 }
3945 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3946 {
3947 /* access bit emulation (not implemented). */
3948 if ((PdeSrc.u & X86_PDE_A) || SHW_PDE_IS_P(PdeDst))
3949 {
3950 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3951 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3952 cErrors++;
3953 continue;
3954 }
3955 if (!SHW_PDE_IS_A(PdeDst))
3956 {
3957 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3958 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3959 cErrors++;
3960 }
3961 fIgnoreFlags |= X86_PTE_P;
3962 }
3963
3964 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
3965 {
3966 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
3967 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
3968 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3969 cErrors++;
3970 }
3971
3972 /* iterate the page table. */
3973 for (unsigned iPT = 0, off = 0;
3974 iPT < RT_ELEMENTS(pPTDst->a);
3975 iPT++, off += PAGE_SIZE, GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst + PAGE_SIZE))
3976 {
3977 const SHWPTE PteDst = pPTDst->a[iPT];
3978
3979 if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
3980 {
3981 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
3982 GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
3983 cErrors++;
3984 }
3985
3986 /* skip not-present entries. */
3987 if (!SHW_PTE_IS_P(PteDst)) /** @todo deal with ALL handlers and CSAM !P pages! */
3988 continue;
3989
3990 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
3991
3992 /* match the physical addresses */
3993 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
3994
3995# ifdef IN_RING3
3996 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3997 if (RT_FAILURE(rc))
3998 {
3999 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4000 {
4001 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4002 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4003 cErrors++;
4004 }
4005 }
4006 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4007 {
4008 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4009 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4010 cErrors++;
4011 continue;
4012 }
4013# endif
4014 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
4015 if (!pPhysPage)
4016 {
4017# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4018 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4019 {
4020 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4021 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4022 cErrors++;
4023 continue;
4024 }
4025# endif
4026 if (SHW_PTE_IS_RW(PteDst))
4027 {
4028 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4029 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4030 cErrors++;
4031 }
4032 fIgnoreFlags |= X86_PTE_RW;
4033 }
4034 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4035 {
4036 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4037 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4038 cErrors++;
4039 continue;
4040 }
4041
4042 /* flags */
4043 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4044 {
4045 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4046 {
4047 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4048 {
4049 if (SHW_PTE_IS_RW(PteDst))
4050 {
4051 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4052 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4053 cErrors++;
4054 continue;
4055 }
4056 fIgnoreFlags |= X86_PTE_RW;
4057 }
4058 }
4059 else
4060 {
4061 if ( SHW_PTE_IS_P(PteDst)
4062# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4063 && !PGM_PAGE_IS_MMIO(pPhysPage)
4064# endif
4065 )
4066 {
4067 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4068 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4069 cErrors++;
4070 continue;
4071 }
4072 fIgnoreFlags |= X86_PTE_P;
4073 }
4074 }
4075
4076 if ( (PdeSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4077 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags) /* lazy phys handler dereg. */
4078 )
4079 {
4080 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4081 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4082 fIgnoreFlags, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4083 cErrors++;
4084 continue;
4085 }
4086 } /* for each PTE */
4087 }
4088 }
4089 /* not present */
4090
4091 } /* for each PDE */
4092
4093 } /* for each PDPTE */
4094
4095 } /* for each PML4E */
4096
4097# ifdef DEBUG
4098 if (cErrors)
4099 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4100# endif
4101# endif /* GST is in {32BIT, PAE, AMD64} */
4102 return cErrors;
4103#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
4104}
4105#endif /* VBOX_STRICT */
4106
4107
4108/**
4109 * Sets up the CR3 for shadow paging
4110 *
4111 * @returns Strict VBox status code.
4112 * @retval VINF_SUCCESS.
4113 *
4114 * @param pVCpu The cross context virtual CPU structure.
4115 * @param GCPhysCR3 The physical address in the CR3 register. (A20 mask
4116 * already applied.)
4117 * @param fPdpesMapped Whether the PAE PDPEs (and PDPT) have been mapped.
4118 */
4119PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3, bool fPdpesMapped)
4120{
4121 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4122 int rc = VINF_SUCCESS;
4123
4124 /* Update guest paging info. */
4125#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4126 || PGM_GST_TYPE == PGM_TYPE_PAE \
4127 || PGM_GST_TYPE == PGM_TYPE_AMD64
4128
4129 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4130 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
4131
4132# if PGM_GST_TYPE == PGM_TYPE_PAE
4133 if (!fPdpesMapped)
4134# else
4135 NOREF(fPdpesMapped);
4136#endif
4137 {
4138 /*
4139 * Map the page CR3 points at.
4140 */
4141 RTHCPTR HCPtrGuestCR3;
4142 PGM_LOCK_VOID(pVM);
4143 PPGMPAGE pPageCR3 = pgmPhysGetPage(pVM, GCPhysCR3);
4144 AssertReturnStmt(pPageCR3, PGM_UNLOCK(pVM), VERR_PGM_INVALID_CR3_ADDR);
4145 /** @todo this needs some reworking wrt. locking? */
4146 rc = pgmPhysGCPhys2CCPtrInternalDepr(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4147 PGM_UNLOCK(pVM);
4148 if (RT_SUCCESS(rc))
4149 {
4150# if PGM_GST_TYPE == PGM_TYPE_32BIT
4151# ifdef IN_RING3
4152 pVCpu->pgm.s.pGst32BitPdR3 = (PX86PD)HCPtrGuestCR3;
4153 pVCpu->pgm.s.pGst32BitPdR0 = NIL_RTR0PTR;
4154# else
4155 pVCpu->pgm.s.pGst32BitPdR3 = NIL_RTR3PTR;
4156 pVCpu->pgm.s.pGst32BitPdR0 = (PX86PD)HCPtrGuestCR3;
4157# endif
4158
4159# elif PGM_GST_TYPE == PGM_TYPE_PAE
4160# ifdef IN_RING3
4161 pVCpu->pgm.s.pGstPaePdptR3 = (PX86PDPT)HCPtrGuestCR3;
4162 pVCpu->pgm.s.pGstPaePdptR0 = NIL_RTR0PTR;
4163# else
4164 pVCpu->pgm.s.pGstPaePdptR3 = NIL_RTR3PTR;
4165 pVCpu->pgm.s.pGstPaePdptR0 = (PX86PDPT)HCPtrGuestCR3;
4166# endif
4167
4168 /*
4169 * Update CPUM and map the 4 PDs too.
4170 */
4171 X86PDPE aGstPaePdpes[X86_PG_PAE_PDPE_ENTRIES];
4172 memcpy(&aGstPaePdpes, HCPtrGuestCR3, sizeof(aGstPaePdpes));
4173 CPUMSetGuestPaePdpes(pVCpu, &aGstPaePdpes[0]);
4174 PGMGstMapPaePdpes(pVCpu, &aGstPaePdpes[0]);
4175
4176# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4177# ifdef IN_RING3
4178 pVCpu->pgm.s.pGstAmd64Pml4R3 = (PX86PML4)HCPtrGuestCR3;
4179 pVCpu->pgm.s.pGstAmd64Pml4R0 = NIL_RTR0PTR;
4180# else
4181 pVCpu->pgm.s.pGstAmd64Pml4R3 = NIL_RTR3PTR;
4182 pVCpu->pgm.s.pGstAmd64Pml4R0 = (PX86PML4)HCPtrGuestCR3;
4183# endif
4184# endif
4185 }
4186 else
4187 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4188 }
4189#else /* prot/real stub */
4190 NOREF(fPdpesMapped);
4191#endif
4192
4193 /*
4194 * Update shadow paging info for guest modes with paging (32-bit, PAE, AMD64).
4195 */
4196# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4197 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4198 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4199 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4200 && PGM_GST_TYPE != PGM_TYPE_PROT))
4201
4202 Assert(!pVM->pgm.s.fNestedPaging);
4203 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
4204
4205 /*
4206 * Update the shadow root page as well since that's not fixed.
4207 */
4208 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4209 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4210 PPGMPOOLPAGE pNewShwPageCR3;
4211
4212 PGM_LOCK_VOID(pVM);
4213
4214# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4215 if (pPool->cDirtyPages)
4216 pgmPoolResetDirtyPages(pVM);
4217# endif
4218
4219 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4220 int const rc2 = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE,
4221 PGM_A20_IS_ENABLED(pVCpu), NIL_PGMPOOL_IDX, UINT32_MAX, true /*fLockPage*/, &pNewShwPageCR3);
4222 AssertFatalRC(rc2);
4223
4224 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4225# ifdef IN_RING0
4226 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4227# else
4228 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4229# endif
4230
4231 /* Set the current hypervisor CR3. */
4232 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4233
4234 /* Clean up the old CR3 root. */
4235 if ( pOldShwPageCR3
4236 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4237 {
4238 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4239
4240 /* Mark the page as unlocked; allow flushing again. */
4241 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4242
4243 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
4244 }
4245 PGM_UNLOCK(pVM);
4246# else
4247 NOREF(GCPhysCR3);
4248# endif
4249
4250 return rc;
4251}
4252
4253/**
4254 * Unmaps the shadow CR3.
4255 *
4256 * @returns VBox status, no specials.
4257 * @param pVCpu The cross context virtual CPU structure.
4258 */
4259PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu)
4260{
4261 LogFlow(("UnmapCR3\n"));
4262
4263 int rc = VINF_SUCCESS;
4264 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4265
4266 /*
4267 * Update guest paging info.
4268 */
4269#if PGM_GST_TYPE == PGM_TYPE_32BIT
4270 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4271 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4272
4273#elif PGM_GST_TYPE == PGM_TYPE_PAE
4274 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4275 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4276 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4277 {
4278 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4279 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4280 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4281 }
4282
4283#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4284 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4285 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4286
4287#else /* prot/real mode stub */
4288 /* nothing to do */
4289#endif
4290
4291 /*
4292 * Update shadow paging info.
4293 */
4294#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4295 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4296 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4297# if PGM_GST_TYPE != PGM_TYPE_REAL
4298 Assert(!pVM->pgm.s.fNestedPaging);
4299# endif
4300 PGM_LOCK_VOID(pVM);
4301
4302 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4303 {
4304 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4305
4306# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4307 if (pPool->cDirtyPages)
4308 pgmPoolResetDirtyPages(pVM);
4309# endif
4310
4311 /* Mark the page as unlocked; allow flushing again. */
4312 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4313
4314 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), NIL_PGMPOOL_IDX, UINT32_MAX);
4315 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4316 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4317 }
4318
4319 PGM_UNLOCK(pVM);
4320#endif
4321
4322 return rc;
4323}
4324
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette