VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 93133

Last change on this file since 93133 was 93115, checked in by vboxsync, 3 years ago

scm --update-copyright-year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 189.0 KB
Line 
1/* $Id: PGMAllBth.h 93115 2022-01-01 11:31:46Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
6 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
7 * bird: WTF does this mean these days? Looking at PGMAll.cpp it's
8 *
9 * @remarks This file is one big \#ifdef-orgy!
10 *
11 */
12
13/*
14 * Copyright (C) 2006-2022 Oracle Corporation
15 *
16 * This file is part of VirtualBox Open Source Edition (OSE), as
17 * available from http://www.virtualbox.org. This file is free software;
18 * you can redistribute it and/or modify it under the terms of the GNU
19 * General Public License (GPL) as published by the Free Software
20 * Foundation, in version 2 as it comes in the "COPYING" file of the
21 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
22 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
23 */
24
25#ifdef _MSC_VER
26/** @todo we're generating unnecessary code in nested/ept shadow mode and for
27 * real/prot-guest+RC mode. */
28# pragma warning(disable: 4505)
29#endif
30
31
32/*********************************************************************************************************************************
33* Internal Functions *
34*********************************************************************************************************************************/
35RT_C_DECLS_BEGIN
36PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
37#ifndef IN_RING3
38PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
39#endif
40PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
41static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
42static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage);
43static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
44#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
45static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
46#else
47static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
48#endif
49PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
50PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
51PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
52#ifdef VBOX_STRICT
53PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
54#endif
55PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
56PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu);
57
58#ifdef IN_RING3
59PGM_BTH_DECL(int, Relocate)(PVMCPUCC pVCpu, RTGCPTR offDelta);
60#endif
61RT_C_DECLS_END
62
63
64
65
66/*
67 * Filter out some illegal combinations of guest and shadow paging, so we can
68 * remove redundant checks inside functions.
69 */
70#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE \
71 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
72# error "Invalid combination; PAE guest implies PAE shadow"
73#endif
74
75#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
76 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 \
77 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
78# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
79#endif
80
81#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
82 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE \
83 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
84# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
85#endif
86
87#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE) \
88 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
89# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
90#endif
91
92
93/**
94 * Enters the shadow+guest mode.
95 *
96 * @returns VBox status code.
97 * @param pVCpu The cross context virtual CPU structure.
98 * @param GCPhysCR3 The physical address from the CR3 register.
99 */
100PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
101{
102 /* Here we deal with allocation of the root shadow page table for real and protected mode during mode switches;
103 * Other modes rely on MapCR3/UnmapCR3 to setup the shadow root page tables.
104 */
105#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
106 || PGM_SHW_TYPE == PGM_TYPE_PAE \
107 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
108 && ( PGM_GST_TYPE == PGM_TYPE_REAL \
109 || PGM_GST_TYPE == PGM_TYPE_PROT))
110
111 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
112
113 Assert((HMIsNestedPagingActive(pVM) || VM_IS_NEM_ENABLED(pVM)) == pVM->pgm.s.fNestedPaging);
114 Assert(!pVM->pgm.s.fNestedPaging);
115
116 PGM_LOCK_VOID(pVM);
117 /* Note: we only really need shadow paging in real and protected mode for VT-x and AMD-V (excluding nested paging/EPT modes),
118 * but any calls to GC need a proper shadow page setup as well.
119 */
120 /* Free the previous root mapping if still active. */
121 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
122 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
123 if (pOldShwPageCR3)
124 {
125 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
126
127 /* Mark the page as unlocked; allow flushing again. */
128 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
129
130 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
131 pVCpu->pgm.s.pShwPageCR3R3 = NIL_RTR3PTR;
132 pVCpu->pgm.s.pShwPageCR3R0 = NIL_RTR0PTR;
133 }
134
135 /* construct a fake address. */
136 GCPhysCR3 = RT_BIT_64(63);
137 PPGMPOOLPAGE pNewShwPageCR3;
138 int rc = pgmPoolAlloc(pVM, GCPhysCR3, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
139 NIL_PGMPOOL_IDX, UINT32_MAX, false /*fLockPage*/,
140 &pNewShwPageCR3);
141 AssertRCReturn(rc, rc);
142
143 pVCpu->pgm.s.pShwPageCR3R3 = (R3PTRTYPE(PPGMPOOLPAGE))MMHyperCCToR3(pVM, pNewShwPageCR3);
144 pVCpu->pgm.s.pShwPageCR3R0 = (R0PTRTYPE(PPGMPOOLPAGE))MMHyperCCToR0(pVM, pNewShwPageCR3);
145
146 /* Mark the page as locked; disallow flushing. */
147 pgmPoolLockPage(pPool, pNewShwPageCR3);
148
149 /* Set the current hypervisor CR3. */
150 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
151
152 PGM_UNLOCK(pVM);
153 return rc;
154#else
155 NOREF(pVCpu); NOREF(GCPhysCR3);
156 return VINF_SUCCESS;
157#endif
158}
159
160
161#ifndef IN_RING3
162
163# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
164/**
165 * Deal with a guest page fault.
166 *
167 * @returns Strict VBox status code.
168 * @retval VINF_EM_RAW_GUEST_TRAP
169 * @retval VINF_EM_RAW_EMULATE_INSTR
170 *
171 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
172 * @param pWalk The guest page table walk result.
173 * @param uErr The error code.
174 */
175PGM_BTH_DECL(VBOXSTRICTRC, Trap0eHandlerGuestFault)(PVMCPUCC pVCpu, PPGMPTWALK pWalk, RTGCUINT uErr)
176{
177 /*
178 * Calc the error code for the guest trap.
179 */
180 uint32_t uNewErr = GST_IS_NX_ACTIVE(pVCpu)
181 ? uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID)
182 : uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US);
183 if ( pWalk->fRsvdError
184 || pWalk->fBadPhysAddr)
185 {
186 uNewErr |= X86_TRAP_PF_RSVD | X86_TRAP_PF_P;
187 Assert(!pWalk->fNotPresent);
188 }
189 else if (!pWalk->fNotPresent)
190 uNewErr |= X86_TRAP_PF_P;
191 TRPMSetErrorCode(pVCpu, uNewErr);
192
193 LogFlow(("Guest trap; cr2=%RGv uErr=%RGv lvl=%d\n", pWalk->GCPtr, uErr, pWalk->uLevel));
194 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
195 return VINF_EM_RAW_GUEST_TRAP;
196}
197# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
198
199
200#if !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
201/**
202 * Deal with a guest page fault.
203 *
204 * The caller has taken the PGM lock.
205 *
206 * @returns Strict VBox status code.
207 *
208 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
209 * @param uErr The error code.
210 * @param pRegFrame The register frame.
211 * @param pvFault The fault address.
212 * @param pPage The guest page at @a pvFault.
213 * @param pWalk The guest page table walk result.
214 * @param pGstWalk The guest paging-mode specific walk information.
215 * @param pfLockTaken PGM lock taken here or not (out). This is true
216 * when we're called.
217 */
218static VBOXSTRICTRC PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame,
219 RTGCPTR pvFault, PPGMPAGE pPage, bool *pfLockTaken
220# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
221 , PPGMPTWALK pWalk
222 , PGSTPTWALK pGstWalk
223# endif
224 )
225{
226# if !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
227 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A };
228# endif
229 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
230 VBOXSTRICTRC rcStrict;
231
232 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
233 {
234 /*
235 * Physical page access handler.
236 */
237# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
238 const RTGCPHYS GCPhysFault = pWalk->GCPhys;
239# else
240 const RTGCPHYS GCPhysFault = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault);
241# endif
242 PPGMPHYSHANDLER pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
243 if (pCur)
244 {
245 PPGMPHYSHANDLERTYPEINT pCurType = PGMPHYSHANDLER_GET_TYPE(pVM, pCur);
246
247# ifdef PGM_SYNC_N_PAGES
248 /*
249 * If the region is write protected and we got a page not present fault, then sync
250 * the pages. If the fault was caused by a read, then restart the instruction.
251 * In case of write access continue to the GC write handler.
252 *
253 * ASSUMES that there is only one handler per page or that they have similar write properties.
254 */
255 if ( !(uErr & X86_TRAP_PF_P)
256 && pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
257 {
258# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
259 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
260# else
261 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
262# endif
263 if ( RT_FAILURE(rcStrict)
264 || !(uErr & X86_TRAP_PF_RW)
265 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
266 {
267 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
268 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
269 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
270 return rcStrict;
271 }
272 }
273# endif
274# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
275 /*
276 * If the access was not thru a #PF(RSVD|...) resync the page.
277 */
278 if ( !(uErr & X86_TRAP_PF_RSVD)
279 && pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
280# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
281 && (pWalk->fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK))
282 == PGM_PTATTRS_W_MASK /** @todo Remove pGstWalk->Core.fEffectiveUS and X86_PTE_US further down in the sync code. */
283# endif
284 )
285 {
286# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
287 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
288# else
289 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
290# endif
291 if ( RT_FAILURE(rcStrict)
292 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
293 {
294 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
295 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
296 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
297 return rcStrict;
298 }
299 }
300# endif
301
302 AssertMsg( pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
303 || (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE && (uErr & X86_TRAP_PF_RW)),
304 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enmKind=%d\n",
305 pvFault, GCPhysFault, pPage, uErr, pCurType->enmKind));
306 if (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
307 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysWrite);
308 else
309 {
310 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAll);
311 if (uErr & X86_TRAP_PF_RSVD) STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAllOpt);
312 }
313
314 if (pCurType->CTX_SUFF(pfnPfHandler))
315 {
316 STAM_PROFILE_START(&pCur->Stat, h);
317
318 if (pCurType->fKeepPgmLock)
319 {
320 rcStrict = pCurType->CTX_SUFF(pfnPfHandler)(pVM, pVCpu, uErr, pRegFrame, pvFault, GCPhysFault,
321 pCur->CTX_SUFF(pvUser));
322
323# ifdef VBOX_WITH_STATISTICS
324 pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault); /* paranoia in case the handler deregistered itself */
325 if (pCur)
326 STAM_PROFILE_STOP(&pCur->Stat, h);
327# endif
328 }
329 else
330 {
331 void * const pvUser = pCur->CTX_SUFF(pvUser);
332 PGM_UNLOCK(pVM);
333 *pfLockTaken = false;
334
335 rcStrict = pCurType->CTX_SUFF(pfnPfHandler)(pVM, pVCpu, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
336
337# ifdef VBOX_WITH_STATISTICS
338 PGM_LOCK_VOID(pVM);
339 pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
340 if (pCur)
341 STAM_PROFILE_STOP(&pCur->Stat, h);
342 PGM_UNLOCK(pVM);
343# endif
344 }
345 }
346 else
347 rcStrict = VINF_EM_RAW_EMULATE_INSTR;
348
349 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndPhys; });
350 return rcStrict;
351 }
352 }
353
354 /*
355 * There is a handled area of the page, but this fault doesn't belong to it.
356 * We must emulate the instruction.
357 *
358 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
359 * we first check if this was a page-not-present fault for a page with only
360 * write access handlers. Restart the instruction if it wasn't a write access.
361 */
362 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersUnhandled);
363
364 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
365 && !(uErr & X86_TRAP_PF_P))
366 {
367# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
368 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
369# else
370 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
371# endif
372 if ( RT_FAILURE(rcStrict)
373 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE
374 || !(uErr & X86_TRAP_PF_RW))
375 {
376 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
377 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
378 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
379 return rcStrict;
380 }
381 }
382
383 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
384 * It's writing to an unhandled part of the LDT page several million times.
385 */
386 rcStrict = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
387 LogFlow(("PGM: PGMInterpretInstruction -> rcStrict=%d pPage=%R[pgmpage]\n", VBOXSTRICTRC_VAL(rcStrict), pPage));
388 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndUnhandled; });
389 return rcStrict;
390} /* if any kind of handler */
391# endif /* !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE*/
392
393
394/**
395 * \#PF Handler for raw-mode guest execution.
396 *
397 * @returns VBox status code (appropriate for trap handling and GC return).
398 *
399 * @param pVCpu The cross context virtual CPU structure.
400 * @param uErr The trap error code.
401 * @param pRegFrame Trap register frame.
402 * @param pvFault The fault address.
403 * @param pfLockTaken PGM lock taken here or not (out)
404 */
405PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
406{
407 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
408
409 *pfLockTaken = false;
410
411# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
412 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
413 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
414 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
415 && PGM_SHW_TYPE != PGM_TYPE_NONE
416 int rc;
417
418# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
419 /*
420 * Walk the guest page translation tables and check if it's a guest fault.
421 */
422 PGMPTWALK Walk;
423 GSTPTWALK GstWalk;
424 rc = PGM_GST_NAME(Walk)(pVCpu, pvFault, &Walk, &GstWalk);
425 if (RT_FAILURE_NP(rc))
426 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &Walk, uErr));
427
428 /* assert some GstWalk sanity. */
429# if PGM_GST_TYPE == PGM_TYPE_AMD64
430 /*AssertMsg(GstWalk.Pml4e.u == GstWalk.pPml4e->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pml4e.u, (uint64_t)GstWalk.pPml4e->u)); - not always true with SMP guests. */
431# endif
432# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
433 /*AssertMsg(GstWalk.Pdpe.u == GstWalk.pPdpe->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pdpe.u, (uint64_t)GstWalk.pPdpe->u)); - ditto */
434# endif
435 /*AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); - ditto */
436 /*AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); - ditto */
437 Assert(Walk.fSucceeded);
438
439 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
440 {
441 if ( ( (uErr & X86_TRAP_PF_RW)
442 && !(Walk.fEffective & PGM_PTATTRS_W_MASK)
443 && ( (uErr & X86_TRAP_PF_US)
444 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
445 || ((uErr & X86_TRAP_PF_US) && !(Walk.fEffective & PGM_PTATTRS_US_MASK))
446 || ((uErr & X86_TRAP_PF_ID) && (Walk.fEffective & PGM_PTATTRS_NX_MASK))
447 )
448 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &Walk, uErr));
449 }
450
451 /* Take the big lock now before we update flags. */
452 *pfLockTaken = true;
453 PGM_LOCK_VOID(pVM);
454
455 /*
456 * Set the accessed and dirty flags.
457 */
458 /** @todo Should probably use cmpxchg logic here as we're potentially racing
459 * other CPUs in SMP configs. (the lock isn't enough, since we take it
460 * after walking and the page tables could be stale already) */
461# if PGM_GST_TYPE == PGM_TYPE_AMD64
462 if (!(GstWalk.Pml4e.u & X86_PML4E_A))
463 {
464 GstWalk.Pml4e.u |= X86_PML4E_A;
465 GST_ATOMIC_OR(&GstWalk.pPml4e->u, X86_PML4E_A);
466 }
467 if (!(GstWalk.Pdpe.u & X86_PDPE_A))
468 {
469 GstWalk.Pdpe.u |= X86_PDPE_A;
470 GST_ATOMIC_OR(&GstWalk.pPdpe->u, X86_PDPE_A);
471 }
472# endif
473 if (Walk.fBigPage)
474 {
475 Assert(GstWalk.Pde.u & X86_PDE_PS);
476 if (uErr & X86_TRAP_PF_RW)
477 {
478 if ((GstWalk.Pde.u & (X86_PDE4M_A | X86_PDE4M_D)) != (X86_PDE4M_A | X86_PDE4M_D))
479 {
480 GstWalk.Pde.u |= X86_PDE4M_A | X86_PDE4M_D;
481 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A | X86_PDE4M_D);
482 }
483 }
484 else
485 {
486 if (!(GstWalk.Pde.u & X86_PDE4M_A))
487 {
488 GstWalk.Pde.u |= X86_PDE4M_A;
489 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A);
490 }
491 }
492 }
493 else
494 {
495 Assert(!(GstWalk.Pde.u & X86_PDE_PS));
496 if (!(GstWalk.Pde.u & X86_PDE_A))
497 {
498 GstWalk.Pde.u |= X86_PDE_A;
499 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE_A);
500 }
501
502 if (uErr & X86_TRAP_PF_RW)
503 {
504# ifdef VBOX_WITH_STATISTICS
505 if (GstWalk.Pte.u & X86_PTE_D)
506 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageAlreadyDirty));
507 else
508 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtiedPage));
509# endif
510 if ((GstWalk.Pte.u & (X86_PTE_A | X86_PTE_D)) != (X86_PTE_A | X86_PTE_D))
511 {
512 GstWalk.Pte.u |= X86_PTE_A | X86_PTE_D;
513 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A | X86_PTE_D);
514 }
515 }
516 else
517 {
518 if (!(GstWalk.Pte.u & X86_PTE_A))
519 {
520 GstWalk.Pte.u |= X86_PTE_A;
521 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A);
522 }
523 }
524 Assert(GstWalk.Pte.u == GstWalk.pPte->u);
525 }
526#if 0
527 /* Disabling this since it's not reliable for SMP, see @bugref{10092#c22}. */
528 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u,
529 ("%RX64 %RX64 pPte=%p pPde=%p Pte=%RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u, GstWalk.pPte, GstWalk.pPde, (uint64_t)GstWalk.pPte->u));
530#endif
531
532# else /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
533 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A}; /** @todo eliminate this */
534
535 /* Take the big lock now. */
536 *pfLockTaken = true;
537 PGM_LOCK_VOID(pVM);
538# endif /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
539
540# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
541 /*
542 * If it is a reserved bit fault we know that it is an MMIO (access
543 * handler) related fault and can skip some 200 lines of code.
544 */
545 if (uErr & X86_TRAP_PF_RSVD)
546 {
547 Assert(uErr & X86_TRAP_PF_P);
548 PPGMPAGE pPage;
549# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
550 rc = pgmPhysGetPageEx(pVM, Walk.GCPhys, &pPage);
551 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
552 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
553 pfLockTaken, &Walk, &GstWalk));
554 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
555# else
556 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault), &pPage);
557 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
558 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
559 pfLockTaken));
560 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
561# endif
562 AssertRC(rc);
563 PGM_INVL_PG(pVCpu, pvFault);
564 return rc; /* Restart with the corrected entry. */
565 }
566# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
567
568 /*
569 * Fetch the guest PDE, PDPE and PML4E.
570 */
571# if PGM_SHW_TYPE == PGM_TYPE_32BIT
572 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
573 PX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
574
575# elif PGM_SHW_TYPE == PGM_TYPE_PAE
576 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
577 PX86PDPAE pPDDst;
578# if PGM_GST_TYPE == PGM_TYPE_PAE
579 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, GstWalk.Pdpe.u, &pPDDst);
580# else
581 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, X86_PDPE_P, &pPDDst); /* RW, US and A are reserved in PAE mode. */
582# endif
583 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
584
585# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
586 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
587 PX86PDPAE pPDDst;
588# if PGM_GST_TYPE == PGM_TYPE_PROT /* (AMD-V nested paging) */
589 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A,
590 X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A, &pPDDst);
591# else
592 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, GstWalk.Pml4e.u, GstWalk.Pdpe.u, &pPDDst);
593# endif
594 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
595
596# elif PGM_SHW_TYPE == PGM_TYPE_EPT
597 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
598 PEPTPD pPDDst;
599 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
600 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
601# endif
602 Assert(pPDDst);
603
604# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
605 /*
606 * Dirty page handling.
607 *
608 * If we successfully correct the write protection fault due to dirty bit
609 * tracking, then return immediately.
610 */
611 if (uErr & X86_TRAP_PF_RW) /* write fault? */
612 {
613 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
614 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], GstWalk.pPde, pvFault);
615 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
616 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
617 {
618 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0
619 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
620 ? &pVCpu->pgm.s.Stats.StatRZTrap0eTime2DirtyAndAccessed
621 : &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
622 Log8(("Trap0eHandler: returns VINF_SUCCESS\n"));
623 return VINF_SUCCESS;
624 }
625#ifdef DEBUG_bird
626 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); // - triggers with smp w7 guests.
627 AssertMsg(Walk.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); // - ditto.
628#endif
629 }
630
631# if 0 /* rarely useful; leave for debugging. */
632 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
633# endif
634# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
635
636 /*
637 * A common case is the not-present error caused by lazy page table syncing.
638 *
639 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
640 * here so we can safely assume that the shadow PT is present when calling
641 * SyncPage later.
642 *
643 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
644 * of mapping conflict and defer to SyncCR3 in R3.
645 * (Again, we do NOT support access handlers for non-present guest pages.)
646 *
647 */
648# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
649 Assert(GstWalk.Pde.u & X86_PDE_P);
650# endif
651 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
652 && !SHW_PDE_IS_P(pPDDst->a[iPDDst]))
653 {
654 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2SyncPT; });
655# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
656 LogFlow(("=>SyncPT %04x = %08RX64\n", (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, (uint64_t)GstWalk.Pde.u));
657 rc = PGM_BTH_NAME(SyncPT)(pVCpu, (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, GstWalk.pPd, pvFault);
658# else
659 LogFlow(("=>SyncPT pvFault=%RGv\n", pvFault));
660 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0, NULL, pvFault);
661# endif
662 if (RT_SUCCESS(rc))
663 return rc;
664 Log(("SyncPT: %RGv failed!! rc=%Rrc\n", pvFault, rc));
665 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
666 return VINF_PGM_SYNC_CR3;
667 }
668
669 /*
670 * Check if this fault address is flagged for special treatment,
671 * which means we'll have to figure out the physical address and
672 * check flags associated with it.
673 *
674 * ASSUME that we can limit any special access handling to pages
675 * in page tables which the guest believes to be present.
676 */
677# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
678 RTGCPHYS GCPhys = Walk.GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
679# else
680 RTGCPHYS GCPhys = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault & ~(RTGCPHYS)PAGE_OFFSET_MASK);
681# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
682 PPGMPAGE pPage;
683 rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
684 if (RT_FAILURE(rc))
685 {
686 /*
687 * When the guest accesses invalid physical memory (e.g. probing
688 * of RAM or accessing a remapped MMIO range), then we'll fall
689 * back to the recompiler to emulate the instruction.
690 */
691 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
692 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersInvalid);
693 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2InvalidPhys; });
694 return VINF_EM_RAW_EMULATE_INSTR;
695 }
696
697 /*
698 * Any handlers for this page?
699 */
700 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
701# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
702 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken,
703 &Walk, &GstWalk));
704# else
705 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken));
706# endif
707
708 /*
709 * We are here only if page is present in Guest page tables and
710 * trap is not handled by our handlers.
711 *
712 * Check it for page out-of-sync situation.
713 */
714 if (!(uErr & X86_TRAP_PF_P))
715 {
716 /*
717 * Page is not present in our page tables. Try to sync it!
718 */
719 if (uErr & X86_TRAP_PF_US)
720 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
721 else /* supervisor */
722 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
723
724 if (PGM_PAGE_IS_BALLOONED(pPage))
725 {
726 /* Emulate reads from ballooned pages as they are not present in
727 our shadow page tables. (Required for e.g. Solaris guests; soft
728 ecc, random nr generator.) */
729 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
730 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
731 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
732 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Ballooned; });
733 return rc;
734 }
735
736# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
737 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
738# else
739 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
740# endif
741 if (RT_SUCCESS(rc))
742 {
743 /* The page was successfully synced, return to the guest. */
744 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSync; });
745 return VINF_SUCCESS;
746 }
747 }
748 else /* uErr & X86_TRAP_PF_P: */
749 {
750 /*
751 * Write protected pages are made writable when the guest makes the
752 * first write to it. This happens for pages that are shared, write
753 * monitored or not yet allocated.
754 *
755 * We may also end up here when CR0.WP=0 in the guest.
756 *
757 * Also, a side effect of not flushing global PDEs are out of sync
758 * pages due to physical monitored regions, that are no longer valid.
759 * Assume for now it only applies to the read/write flag.
760 */
761 if (uErr & X86_TRAP_PF_RW)
762 {
763 /*
764 * Check if it is a read-only page.
765 */
766 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
767 {
768 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
769 Assert(!PGM_PAGE_IS_ZERO(pPage));
770 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
771 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2MakeWritable; });
772
773 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
774 if (rc != VINF_SUCCESS)
775 {
776 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
777 return rc;
778 }
779 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
780 return VINF_EM_NO_MEMORY;
781 }
782
783# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
784 /*
785 * Check to see if we need to emulate the instruction if CR0.WP=0.
786 */
787 if ( !(Walk.fEffective & PGM_PTATTRS_W_MASK)
788 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
789 && CPUMGetGuestCPL(pVCpu) < 3)
790 {
791 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
792
793 /*
794 * The Netware WP0+RO+US hack.
795 *
796 * Netware sometimes(/always?) runs with WP0. It has been observed doing
797 * excessive write accesses to pages which are mapped with US=1 and RW=0
798 * while WP=0. This causes a lot of exits and extremely slow execution.
799 * To avoid trapping and emulating every write here, we change the shadow
800 * page table entry to map it as US=0 and RW=1 until user mode tries to
801 * access it again (see further below). We count these shadow page table
802 * changes so we can avoid having to clear the page pool every time the WP
803 * bit changes to 1 (see PGMCr0WpEnabled()).
804 */
805# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) && 1
806 if ( (Walk.fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK)) == PGM_PTATTRS_US_MASK
807 && (Walk.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
808 && pVM->cCpus == 1 /* Sorry, no go on SMP. Add CFGM option? */)
809 {
810 Log(("PGM #PF: Netware WP0+RO+US hack: pvFault=%RGp uErr=%#x (big=%d)\n", pvFault, uErr, Walk.fBigPage));
811 rc = pgmShwMakePageSupervisorAndWritable(pVCpu, pvFault, Walk.fBigPage, PGM_MK_PG_IS_WRITE_FAULT);
812 if (rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3)
813 {
814 PGM_INVL_PG(pVCpu, pvFault);
815 pVCpu->pgm.s.cNetwareWp0Hacks++;
816 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsHack; });
817 return rc;
818 }
819 AssertMsg(RT_FAILURE_NP(rc), ("%Rrc\n", rc));
820 Log(("pgmShwMakePageSupervisorAndWritable(%RGv) failed with rc=%Rrc - ignored\n", pvFault, rc));
821 }
822# endif
823
824 /* Interpret the access. */
825 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
826 Log(("PGM #PF: WP0 emulation (pvFault=%RGp uErr=%#x cpl=%d fBig=%d fEffUs=%d)\n", pvFault, uErr, CPUMGetGuestCPL(pVCpu), Walk.fBigPage, !!(Walk.fEffective & PGM_PTATTRS_US_MASK)));
827 if (RT_SUCCESS(rc))
828 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulInRZ);
829 else
830 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulToR3);
831 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2WPEmulation; });
832 return rc;
833 }
834# endif
835 /// @todo count the above case; else
836 if (uErr & X86_TRAP_PF_US)
837 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
838 else /* supervisor */
839 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
840
841 /*
842 * Sync the page.
843 *
844 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
845 * page is not present, which is not true in this case.
846 */
847# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
848 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
849# else
850 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
851# endif
852 if (RT_SUCCESS(rc))
853 {
854 /*
855 * Page was successfully synced, return to guest but invalidate
856 * the TLB first as the page is very likely to be in it.
857 */
858# if PGM_SHW_TYPE == PGM_TYPE_EPT
859 HMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
860# else
861 PGM_INVL_PG(pVCpu, pvFault);
862# endif
863# ifdef VBOX_STRICT
864 PGMPTWALK GstPageWalk;
865 GstPageWalk.GCPhys = RTGCPHYS_MAX;
866 if (!pVM->pgm.s.fNestedPaging)
867 {
868 rc = PGMGstGetPage(pVCpu, pvFault, &GstPageWalk);
869 AssertMsg(RT_SUCCESS(rc) && ((GstPageWalk.fEffective & X86_PTE_RW) || ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG && CPUMGetGuestCPL(pVCpu) < 3)), ("rc=%Rrc fPageGst=%RX64\n", rc, GstPageWalk.fEffective));
870 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GstPageWalk.GCPhys, GstPageWalk.fEffective));
871 }
872# if 0 /* Bogus! Triggers incorrectly with w7-64 and later for the SyncPage case: "Pde at %RGv changed behind our back?" */
873 uint64_t fPageShw = 0;
874 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
875 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */,
876 ("rc=%Rrc fPageShw=%RX64 GCPhys2=%RGp fPageGst=%RX64 pvFault=%RGv\n", rc, fPageShw, GstPageWalk.GCPhys, fPageGst, pvFault));
877# endif
878# endif /* VBOX_STRICT */
879 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndObs; });
880 return VINF_SUCCESS;
881 }
882 }
883# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
884 /*
885 * Check for Netware WP0+RO+US hack from above and undo it when user
886 * mode accesses the page again.
887 */
888 else if ( (Walk.fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK)) == PGM_PTATTRS_US_MASK
889 && (Walk.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
890 && pVCpu->pgm.s.cNetwareWp0Hacks > 0
891 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
892 && CPUMGetGuestCPL(pVCpu) == 3
893 && pVM->cCpus == 1
894 )
895 {
896 Log(("PGM #PF: Undo netware WP0+RO+US hack: pvFault=%RGp uErr=%#x\n", pvFault, uErr));
897 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
898 if (RT_SUCCESS(rc))
899 {
900 PGM_INVL_PG(pVCpu, pvFault);
901 pVCpu->pgm.s.cNetwareWp0Hacks--;
902 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsUnhack; });
903 return VINF_SUCCESS;
904 }
905 }
906# endif /* PGM_WITH_PAGING */
907
908 /** @todo else: why are we here? */
909
910# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && defined(VBOX_STRICT)
911 /*
912 * Check for VMM page flags vs. Guest page flags consistency.
913 * Currently only for debug purposes.
914 */
915 if (RT_SUCCESS(rc))
916 {
917 /* Get guest page flags. */
918 PGMPTWALK GstPageWalk;
919 int rc2 = PGMGstGetPage(pVCpu, pvFault, &GstPageWalk);
920 if (RT_SUCCESS(rc2))
921 {
922 uint64_t fPageShw = 0;
923 rc2 = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
924
925#if 0
926 /*
927 * Compare page flags.
928 * Note: we have AVL, A, D bits desynced.
929 */
930 AssertMsg( (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
931 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
932 || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0
933 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
934 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
935 && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW
936 && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US),
937 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64 rc=%d\n",
938 pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst, rc));
93901:01:15.623511 00:08:43.266063 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
94001:01:15.623511 00:08:43.266064 Location : e:\vbox\svn\trunk\srcPage flags mismatch! pvFault=fffff801b0d7b000 uErr=11 GCPhys=0000000019b52000 fPageShw=0 fPageGst=77b0000000000121 rc=0
941
94201:01:15.625516 00:08:43.268051 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
94301:01:15.625516 00:08:43.268051 Location :
944e:\vbox\svn\trunk\srcPage flags mismatch!
945pvFault=fffff801b0d7b000
946 uErr=11 X86_TRAP_PF_ID | X86_TRAP_PF_P
947GCPhys=0000000019b52000
948fPageShw=0
949fPageGst=77b0000000000121
950rc=0
951#endif
952
953 }
954 else
955 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
956 }
957 else
958 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
959# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && VBOX_STRICT */
960 }
961
962
963 /*
964 * If we get here it is because something failed above, i.e. most like guru
965 * meditiation time.
966 */
967 LogRel(("%s: returns rc=%Rrc pvFault=%RGv uErr=%RX64 cs:rip=%04x:%08RX64\n",
968 __PRETTY_FUNCTION__, rc, pvFault, (uint64_t)uErr, pRegFrame->cs.Sel, pRegFrame->rip));
969 return rc;
970
971# else /* Nested paging, EPT except PGM_GST_TYPE = PROT, NONE. */
972 NOREF(uErr); NOREF(pRegFrame); NOREF(pvFault);
973 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
974 return VERR_PGM_NOT_USED_IN_MODE;
975# endif
976}
977
978#endif /* !IN_RING3 */
979
980
981/**
982 * Emulation of the invlpg instruction.
983 *
984 *
985 * @returns VBox status code.
986 *
987 * @param pVCpu The cross context virtual CPU structure.
988 * @param GCPtrPage Page to invalidate.
989 *
990 * @remark ASSUMES that the guest is updating before invalidating. This order
991 * isn't required by the CPU, so this is speculative and could cause
992 * trouble.
993 * @remark No TLB shootdown is done on any other VCPU as we assume that
994 * invlpg emulation is the *only* reason for calling this function.
995 * (The guest has to shoot down TLB entries on other CPUs itself)
996 * Currently true, but keep in mind!
997 *
998 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
999 * Should only be required when PGMPOOL_WITH_OPTIMIZED_DIRTY_PT is active (PAE or AMD64 (for now))
1000 */
1001PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
1002{
1003#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1004 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
1005 && PGM_SHW_TYPE != PGM_TYPE_NONE
1006 int rc;
1007 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1008 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1009
1010 PGM_LOCK_ASSERT_OWNER(pVM);
1011
1012 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1013
1014 /*
1015 * Get the shadow PD entry and skip out if this PD isn't present.
1016 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1017 */
1018# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1019 const unsigned iPDDst = (uint32_t)GCPtrPage >> SHW_PD_SHIFT;
1020 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1021
1022 /* Fetch the pgm pool shadow descriptor. */
1023 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1024# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1025 if (!pShwPde)
1026 {
1027 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1028 return VINF_SUCCESS;
1029 }
1030# else
1031 Assert(pShwPde);
1032# endif
1033
1034# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1035 const unsigned iPdpt = (uint32_t)GCPtrPage >> X86_PDPT_SHIFT;
1036 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
1037
1038 /* If the shadow PDPE isn't present, then skip the invalidate. */
1039# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1040 if (!pPdptDst || !(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1041# else
1042 if (!(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1043# endif
1044 {
1045 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1046 PGM_INVL_PG(pVCpu, GCPtrPage);
1047 return VINF_SUCCESS;
1048 }
1049
1050 /* Fetch the pgm pool shadow descriptor. */
1051 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1052 AssertReturn(pShwPde, VERR_PGM_POOL_GET_PAGE_FAILED);
1053
1054 PX86PDPAE pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1055 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1056 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1057
1058# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1059 /* PML4 */
1060 /*const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;*/
1061 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1062 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1063 PX86PDPAE pPDDst;
1064 PX86PDPT pPdptDst;
1065 PX86PML4E pPml4eDst;
1066 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1067 if (rc != VINF_SUCCESS)
1068 {
1069 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1070 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1071 PGM_INVL_PG(pVCpu, GCPtrPage);
1072 return VINF_SUCCESS;
1073 }
1074 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1075 Assert(pPDDst);
1076 Assert(pPdptDst->a[iPdpt].u & X86_PDPE_P);
1077
1078 /* Fetch the pgm pool shadow descriptor. */
1079 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1080 Assert(pShwPde);
1081
1082# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1083
1084 const SHWPDE PdeDst = *pPdeDst;
1085 if (!(PdeDst.u & X86_PDE_P))
1086 {
1087 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1088 PGM_INVL_PG(pVCpu, GCPtrPage);
1089 return VINF_SUCCESS;
1090 }
1091
1092 /*
1093 * Get the guest PD entry and calc big page.
1094 */
1095# if PGM_GST_TYPE == PGM_TYPE_32BIT
1096 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1097 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
1098 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1099# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1100 unsigned iPDSrc = 0;
1101# if PGM_GST_TYPE == PGM_TYPE_PAE
1102 X86PDPE PdpeSrcIgn;
1103 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrcIgn);
1104# else /* AMD64 */
1105 PX86PML4E pPml4eSrcIgn;
1106 X86PDPE PdpeSrcIgn;
1107 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrcIgn, &PdpeSrcIgn, &iPDSrc);
1108# endif
1109 GSTPDE PdeSrc;
1110
1111 if (pPDSrc)
1112 PdeSrc = pPDSrc->a[iPDSrc];
1113 else
1114 PdeSrc.u = 0;
1115# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1116 const bool fWasBigPage = RT_BOOL(PdeDst.u & PGM_PDFLAGS_BIG_PAGE);
1117 const bool fIsBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
1118 if (fWasBigPage != fIsBigPage)
1119 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1120
1121# ifdef IN_RING3
1122 /*
1123 * If a CR3 Sync is pending we may ignore the invalidate page operation
1124 * depending on the kind of sync and if it's a global page or not.
1125 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1126 */
1127# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1128 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1129 || ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1130 && fIsBigPage
1131 && (PdeSrc.u & X86_PDE4M_G)
1132 )
1133 )
1134# else
1135 if (VM_FF_IS_ANY_SET(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1136# endif
1137 {
1138 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1139 return VINF_SUCCESS;
1140 }
1141# endif /* IN_RING3 */
1142
1143 /*
1144 * Deal with the Guest PDE.
1145 */
1146 rc = VINF_SUCCESS;
1147 if (PdeSrc.u & X86_PDE_P)
1148 {
1149 Assert( (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
1150 && ((PdeSrc.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW) || pVCpu->pgm.s.cNetwareWp0Hacks > 0));
1151 if (!fIsBigPage)
1152 {
1153 /*
1154 * 4KB - page.
1155 */
1156 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1157 RTGCPHYS GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1158
1159# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1160 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1161 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (PAGE_SIZE / 2)));
1162# endif
1163 if (pShwPage->GCPhys == GCPhys)
1164 {
1165 /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1166 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1167
1168 PGSTPT pPTSrc;
1169 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1170 if (RT_SUCCESS(rc))
1171 {
1172 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1173 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1174 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1175 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1176 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1177 GCPtrPage, PteSrc.u & X86_PTE_P,
1178 (PteSrc.u & PdeSrc.u & X86_PTE_RW),
1179 (PteSrc.u & PdeSrc.u & X86_PTE_US),
1180 (uint64_t)PteSrc.u,
1181 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1182 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1183 }
1184 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1185 PGM_INVL_PG(pVCpu, GCPtrPage);
1186 }
1187 else
1188 {
1189 /*
1190 * The page table address changed.
1191 */
1192 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1193 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1194 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1195 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1196 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1197 PGM_INVL_VCPU_TLBS(pVCpu);
1198 }
1199 }
1200 else
1201 {
1202 /*
1203 * 2/4MB - page.
1204 */
1205 /* Before freeing the page, check if anything really changed. */
1206 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1207 RTGCPHYS GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1208# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1209 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1210 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
1211# endif
1212 if ( pShwPage->GCPhys == GCPhys
1213 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1214 {
1215 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1216 /** @todo This test is wrong as it cannot check the G bit!
1217 * FIXME */
1218 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1219 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1220 && ( (PdeSrc.u & X86_PDE4M_D) /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1221 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1222 {
1223 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1224 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1225 return VINF_SUCCESS;
1226 }
1227 }
1228
1229 /*
1230 * Ok, the page table is present and it's been changed in the guest.
1231 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1232 * We could do this for some flushes in GC too, but we need an algorithm for
1233 * deciding which 4MB pages containing code likely to be executed very soon.
1234 */
1235 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1236 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1237 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1238 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1239 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1240 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1241 }
1242 }
1243 else
1244 {
1245 /*
1246 * Page directory is not present, mark shadow PDE not present.
1247 */
1248 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1249 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1250 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1251 PGM_INVL_PG(pVCpu, GCPtrPage);
1252 }
1253 return rc;
1254
1255#else /* guest real and protected mode, nested + ept, none. */
1256 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1257 NOREF(pVCpu); NOREF(GCPtrPage);
1258 return VINF_SUCCESS;
1259#endif
1260}
1261
1262#if PGM_SHW_TYPE != PGM_TYPE_NONE
1263
1264/**
1265 * Update the tracking of shadowed pages.
1266 *
1267 * @param pVCpu The cross context virtual CPU structure.
1268 * @param pShwPage The shadow page.
1269 * @param HCPhys The physical page we is being dereferenced.
1270 * @param iPte Shadow PTE index
1271 * @param GCPhysPage Guest physical address (only valid if pShwPage->fDirty is set)
1272 */
1273DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte,
1274 RTGCPHYS GCPhysPage)
1275{
1276 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1277
1278# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1279 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1280 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1281
1282 /* Use the hint we retrieved from the cached guest PT. */
1283 if (pShwPage->fDirty)
1284 {
1285 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1286
1287 Assert(pShwPage->cPresent);
1288 Assert(pPool->cPresent);
1289 pShwPage->cPresent--;
1290 pPool->cPresent--;
1291
1292 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysPage);
1293 AssertRelease(pPhysPage);
1294 pgmTrackDerefGCPhys(pPool, pShwPage, pPhysPage, iPte);
1295 return;
1296 }
1297# else
1298 NOREF(GCPhysPage);
1299# endif
1300
1301 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatTrackDeref, a);
1302 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1303
1304 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1305 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1306 * 2. write protect all shadowed pages. I.e. implement caching.
1307 */
1308 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1309
1310 /*
1311 * Find the guest address.
1312 */
1313 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
1314 pRam;
1315 pRam = pRam->CTX_SUFF(pNext))
1316 {
1317 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1318 while (iPage-- > 0)
1319 {
1320 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1321 {
1322 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1323
1324 Assert(pShwPage->cPresent);
1325 Assert(pPool->cPresent);
1326 pShwPage->cPresent--;
1327 pPool->cPresent--;
1328
1329 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1330 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatTrackDeref, a);
1331 return;
1332 }
1333 }
1334 }
1335
1336 for (;;)
1337 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1338}
1339
1340
1341/**
1342 * Update the tracking of shadowed pages.
1343 *
1344 * @param pVCpu The cross context virtual CPU structure.
1345 * @param pShwPage The shadow page.
1346 * @param u16 The top 16-bit of the pPage->HCPhys.
1347 * @param pPage Pointer to the guest page. this will be modified.
1348 * @param iPTDst The index into the shadow table.
1349 */
1350DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16,
1351 PPGMPAGE pPage, const unsigned iPTDst)
1352{
1353 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1354
1355 /*
1356 * Just deal with the simple first time here.
1357 */
1358 if (!u16)
1359 {
1360 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackVirgin);
1361 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1362 /* Save the page table index. */
1363 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, iPTDst);
1364 }
1365 else
1366 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1367
1368 /* write back */
1369 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1370 PGM_PAGE_SET_TRACKING(pVM, pPage, u16);
1371
1372 /* update statistics. */
1373 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1374 pShwPage->cPresent++;
1375 if (pShwPage->iFirstPresent > iPTDst)
1376 pShwPage->iFirstPresent = iPTDst;
1377}
1378
1379
1380/**
1381 * Modifies a shadow PTE to account for access handlers.
1382 *
1383 * @param pVM The cross context VM structure.
1384 * @param pPage The page in question.
1385 * @param fPteSrc The shadowed flags of the source PTE. Must include the
1386 * A (accessed) bit so it can be emulated correctly.
1387 * @param pPteDst The shadow PTE (output). This is temporary storage and
1388 * does not need to be set atomically.
1389 */
1390DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVMCC pVM, PCPGMPAGE pPage, uint64_t fPteSrc, PSHWPTE pPteDst)
1391{
1392 NOREF(pVM); RT_NOREF_PV(fPteSrc);
1393
1394 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1395 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1396 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1397 {
1398 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark read-only\n", pPage));
1399# if PGM_SHW_TYPE == PGM_TYPE_EPT
1400 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage) | EPT_E_READ | EPT_E_EXECUTE | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
1401# else
1402 if (fPteSrc & X86_PTE_A)
1403 {
1404 SHW_PTE_SET(*pPteDst, fPteSrc | PGM_PAGE_GET_HCPHYS(pPage));
1405 SHW_PTE_SET_RO(*pPteDst);
1406 }
1407 else
1408 SHW_PTE_SET(*pPteDst, 0);
1409# endif
1410 }
1411# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1412# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1413 else if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
1414 && ( BTH_IS_NP_ACTIVE(pVM)
1415 || (fPteSrc & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW) /** @todo Remove X86_PTE_US here and pGstWalk->Core.fEffectiveUS before the sync page test. */
1416# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1417 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1418# endif
1419 )
1420 {
1421 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1422# if PGM_SHW_TYPE == PGM_TYPE_EPT
1423 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1424 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg
1425 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1426 | EPT_E_WRITE
1427 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1428 | EPT_E_MEMTYPE_INVALID_3;
1429# else
1430 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1431 SHW_PTE_SET(*pPteDst, pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P);
1432# endif
1433 }
1434# endif
1435# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1436 else
1437 {
1438 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1439 SHW_PTE_SET(*pPteDst, 0);
1440 }
1441 /** @todo count these kinds of entries. */
1442}
1443
1444
1445/**
1446 * Creates a 4K shadow page for a guest page.
1447 *
1448 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1449 * physical address. The PdeSrc argument only the flags are used. No page
1450 * structured will be mapped in this function.
1451 *
1452 * @param pVCpu The cross context virtual CPU structure.
1453 * @param pPteDst Destination page table entry.
1454 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1455 * Can safely assume that only the flags are being used.
1456 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1457 * @param pShwPage Pointer to the shadow page.
1458 * @param iPTDst The index into the shadow table.
1459 *
1460 * @remark Not used for 2/4MB pages!
1461 */
1462# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
1463static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1464 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1465# else
1466static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage,
1467 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1468# endif
1469{
1470 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1471 RTGCPHYS GCPhysOldPage = NIL_RTGCPHYS;
1472
1473# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1474 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1475 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1476
1477 if (pShwPage->fDirty)
1478 {
1479 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1480 PGSTPT pGstPT;
1481
1482 /* Note that iPTDst can be used to index the guest PT even in the pae/32bit combo as we copy only half the table; see pgmPoolAddDirtyPage. */
1483 pGstPT = (PGSTPT)&pPool->aDirtyPages[pShwPage->idxDirtyEntry].aPage[0];
1484 GCPhysOldPage = GST_GET_PTE_GCPHYS(pGstPT->a[iPTDst]);
1485 pGstPT->a[iPTDst].u = PteSrc.u;
1486 }
1487# else
1488 Assert(!pShwPage->fDirty);
1489# endif
1490
1491# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1492 if ( (PteSrc.u & X86_PTE_P)
1493 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1494# endif
1495 {
1496# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1497 RTGCPHYS GCPhysPage = GST_GET_PTE_GCPHYS(PteSrc);
1498# endif
1499 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysPage);
1500
1501 /*
1502 * Find the ram range.
1503 */
1504 PPGMPAGE pPage;
1505 int rc = pgmPhysGetPageEx(pVM, GCPhysPage, &pPage);
1506 if (RT_SUCCESS(rc))
1507 {
1508 /* Ignore ballooned pages.
1509 Don't return errors or use a fatal assert here as part of a
1510 shadow sync range might included ballooned pages. */
1511 if (PGM_PAGE_IS_BALLOONED(pPage))
1512 {
1513 Assert(!SHW_PTE_IS_P(*pPteDst)); /** @todo user tracking needs updating if this triggers. */
1514 return;
1515 }
1516
1517# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1518 /* Make the page writable if necessary. */
1519 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1520 && ( PGM_PAGE_IS_ZERO(pPage)
1521# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1522 || ( (PteSrc.u & X86_PTE_RW)
1523# else
1524 || ( 1
1525# endif
1526 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1527# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1528 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1529# endif
1530# ifdef VBOX_WITH_PAGE_SHARING
1531 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1532# endif
1533 )
1534 )
1535 )
1536 {
1537 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhysPage);
1538 AssertRC(rc);
1539 }
1540# endif
1541
1542 /*
1543 * Make page table entry.
1544 */
1545 SHWPTE PteDst;
1546# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1547 uint64_t fGstShwPteFlags = GST_GET_PTE_SHW_FLAGS(pVCpu, PteSrc);
1548# else
1549 uint64_t fGstShwPteFlags = X86_PTE_P | X86_PTE_RW | X86_PTE_US | X86_PTE_A | X86_PTE_D;
1550# endif
1551 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1552 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, fGstShwPteFlags, &PteDst);
1553 else
1554 {
1555# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1556 /*
1557 * If the page or page directory entry is not marked accessed,
1558 * we mark the page not present.
1559 */
1560 if (!(PteSrc.u & X86_PTE_A) || !(PdeSrc.u & X86_PDE_A))
1561 {
1562 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1563 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,AccessedPage));
1564 SHW_PTE_SET(PteDst, 0);
1565 }
1566 /*
1567 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1568 * when the page is modified.
1569 */
1570 else if (!(PteSrc.u & X86_PTE_D) && (PdeSrc.u & PteSrc.u & X86_PTE_RW))
1571 {
1572 AssertCompile(X86_PTE_RW == X86_PDE_RW);
1573 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPage));
1574 SHW_PTE_SET(PteDst,
1575 fGstShwPteFlags
1576 | PGM_PAGE_GET_HCPHYS(pPage)
1577 | PGM_PTFLAGS_TRACK_DIRTY);
1578 SHW_PTE_SET_RO(PteDst);
1579 }
1580 else
1581# endif
1582 {
1583 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageSkipped));
1584# if PGM_SHW_TYPE == PGM_TYPE_EPT
1585 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage)
1586 | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
1587# else
1588 SHW_PTE_SET(PteDst, fGstShwPteFlags | PGM_PAGE_GET_HCPHYS(pPage));
1589# endif
1590 }
1591
1592 /*
1593 * Make sure only allocated pages are mapped writable.
1594 */
1595 if ( SHW_PTE_IS_P_RW(PteDst)
1596 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1597 {
1598 /* Still applies to shared pages. */
1599 Assert(!PGM_PAGE_IS_ZERO(pPage));
1600 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet. Why, isn't it? */
1601 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", GCPhysPage, pPage, iPTDst));
1602 }
1603 }
1604
1605 /*
1606 * Keep user track up to date.
1607 */
1608 if (SHW_PTE_IS_P(PteDst))
1609 {
1610 if (!SHW_PTE_IS_P(*pPteDst))
1611 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1612 else if (SHW_PTE_GET_HCPHYS(*pPteDst) != SHW_PTE_GET_HCPHYS(PteDst))
1613 {
1614 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst), SHW_PTE_LOG64(PteDst)));
1615 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1616 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1617 }
1618 }
1619 else if (SHW_PTE_IS_P(*pPteDst))
1620 {
1621 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1622 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1623 }
1624
1625 /*
1626 * Update statistics and commit the entry.
1627 */
1628# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1629 if (!(PteSrc.u & X86_PTE_G))
1630 pShwPage->fSeenNonGlobal = true;
1631# endif
1632 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
1633 return;
1634 }
1635
1636/** @todo count these three different kinds. */
1637 Log2(("SyncPageWorker: invalid address in Pte\n"));
1638 }
1639# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1640 else if (!(PteSrc.u & X86_PTE_P))
1641 Log2(("SyncPageWorker: page not present in Pte\n"));
1642 else
1643 Log2(("SyncPageWorker: invalid Pte\n"));
1644# endif
1645
1646 /*
1647 * The page is not present or the PTE is bad. Replace the shadow PTE by
1648 * an empty entry, making sure to keep the user tracking up to date.
1649 */
1650 if (SHW_PTE_IS_P(*pPteDst))
1651 {
1652 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1653 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1654 }
1655 SHW_PTE_ATOMIC_SET(*pPteDst, 0);
1656}
1657
1658
1659/**
1660 * Syncs a guest OS page.
1661 *
1662 * There are no conflicts at this point, neither is there any need for
1663 * page table allocations.
1664 *
1665 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
1666 * When called in AMD64 guest mode, the guest PML4E shall be valid.
1667 *
1668 * @returns VBox status code.
1669 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1670 * @param pVCpu The cross context virtual CPU structure.
1671 * @param PdeSrc Page directory entry of the guest.
1672 * @param GCPtrPage Guest context page address.
1673 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1674 * @param uErr Fault error (X86_TRAP_PF_*).
1675 */
1676static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1677{
1678 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1679 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
1680 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1681 RT_NOREF_PV(uErr); RT_NOREF_PV(cPages); RT_NOREF_PV(GCPtrPage);
1682
1683 PGM_LOCK_ASSERT_OWNER(pVM);
1684
1685# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1686 || PGM_GST_TYPE == PGM_TYPE_PAE \
1687 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1688 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE)
1689
1690 /*
1691 * Assert preconditions.
1692 */
1693 Assert(PdeSrc.u & X86_PDE_P);
1694 Assert(cPages);
1695# if 0 /* rarely useful; leave for debugging. */
1696 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1697# endif
1698
1699 /*
1700 * Get the shadow PDE, find the shadow page table in the pool.
1701 */
1702# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1703 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1704 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1705
1706 /* Fetch the pgm pool shadow descriptor. */
1707 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1708 Assert(pShwPde);
1709
1710# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1711 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1712 PPGMPOOLPAGE pShwPde = NULL;
1713 PX86PDPAE pPDDst;
1714
1715 /* Fetch the pgm pool shadow descriptor. */
1716 int rc2 = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
1717 AssertRCSuccessReturn(rc2, rc2);
1718 Assert(pShwPde);
1719
1720 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1721 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1722
1723# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1724 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1725 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1726 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1727 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1728
1729 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1730 AssertRCSuccessReturn(rc2, rc2);
1731 Assert(pPDDst && pPdptDst);
1732 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1733# endif
1734 SHWPDE PdeDst = *pPdeDst;
1735
1736 /*
1737 * - In the guest SMP case we could have blocked while another VCPU reused
1738 * this page table.
1739 * - With W7-64 we may also take this path when the A bit is cleared on
1740 * higher level tables (PDPE/PML4E). The guest does not invalidate the
1741 * relevant TLB entries. If we're write monitoring any page mapped by
1742 * the modified entry, we may end up here with a "stale" TLB entry.
1743 */
1744 if (!(PdeDst.u & X86_PDE_P))
1745 {
1746 Log(("CPU%u: SyncPage: Pde at %RGv changed behind our back? (pPdeDst=%p/%RX64) uErr=%#x\n", pVCpu->idCpu, GCPtrPage, pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
1747 AssertMsg(pVM->cCpus > 1 || (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW),
1748 ("Unexpected missing PDE p=%p/%RX64 uErr=%#x\n", pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
1749 if (uErr & X86_TRAP_PF_P)
1750 PGM_INVL_PG(pVCpu, GCPtrPage);
1751 return VINF_SUCCESS; /* force the instruction to be executed again. */
1752 }
1753
1754 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1755 Assert(pShwPage);
1756
1757# if PGM_GST_TYPE == PGM_TYPE_AMD64
1758 /* Fetch the pgm pool shadow descriptor. */
1759 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1760 Assert(pShwPde);
1761# endif
1762
1763 /*
1764 * Check that the page is present and that the shadow PDE isn't out of sync.
1765 */
1766 const bool fBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
1767 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
1768 RTGCPHYS GCPhys;
1769 if (!fBigPage)
1770 {
1771 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1772# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1773 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1774 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (PAGE_SIZE / 2)));
1775# endif
1776 }
1777 else
1778 {
1779 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1780# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1781 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1782 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
1783# endif
1784 }
1785 /** @todo This doesn't check the G bit of 2/4MB pages. FIXME */
1786 if ( fPdeValid
1787 && pShwPage->GCPhys == GCPhys
1788 && (PdeSrc.u & X86_PDE_P)
1789 && (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
1790 && ((PdeSrc.u & X86_PDE_RW) == (PdeDst.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW))
1791# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1792 && ((PdeSrc.u & X86_PDE_PAE_NX) == (PdeDst.u & X86_PDE_PAE_NX) || !GST_IS_NX_ACTIVE(pVCpu))
1793# endif
1794 )
1795 {
1796 /*
1797 * Check that the PDE is marked accessed already.
1798 * Since we set the accessed bit *before* getting here on a #PF, this
1799 * check is only meant for dealing with non-#PF'ing paths.
1800 */
1801 if (PdeSrc.u & X86_PDE_A)
1802 {
1803 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1804 if (!fBigPage)
1805 {
1806 /*
1807 * 4KB Page - Map the guest page table.
1808 */
1809 PGSTPT pPTSrc;
1810 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1811 if (RT_SUCCESS(rc))
1812 {
1813# ifdef PGM_SYNC_N_PAGES
1814 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1815 if ( cPages > 1
1816 && !(uErr & X86_TRAP_PF_P)
1817 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
1818 {
1819 /*
1820 * This code path is currently only taken when the caller is PGMTrap0eHandler
1821 * for non-present pages!
1822 *
1823 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1824 * deal with locality.
1825 */
1826 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1827# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1828 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1829 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1830# else
1831 const unsigned offPTSrc = 0;
1832# endif
1833 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1834 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1835 iPTDst = 0;
1836 else
1837 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1838
1839 for (; iPTDst < iPTDstEnd; iPTDst++)
1840 {
1841 const PGSTPTE pPteSrc = &pPTSrc->a[offPTSrc + iPTDst];
1842
1843 if ( (pPteSrc->u & X86_PTE_P)
1844 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
1845 {
1846 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1847 NOREF(GCPtrCurPage);
1848 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, *pPteSrc, pShwPage, iPTDst);
1849 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1850 GCPtrCurPage, pPteSrc->u & X86_PTE_P,
1851 !!(pPteSrc->u & PdeSrc.u & X86_PTE_RW),
1852 !!(pPteSrc->u & PdeSrc.u & X86_PTE_US),
1853 (uint64_t)pPteSrc->u,
1854 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1855 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1856 }
1857 }
1858 }
1859 else
1860# endif /* PGM_SYNC_N_PAGES */
1861 {
1862 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1863 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1864 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1865 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1866 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1867 GCPtrPage, PteSrc.u & X86_PTE_P,
1868 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
1869 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
1870 (uint64_t)PteSrc.u,
1871 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1872 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1873 }
1874 }
1875 else /* MMIO or invalid page: emulated in #PF handler. */
1876 {
1877 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1878 Assert(!SHW_PTE_IS_P(pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK]));
1879 }
1880 }
1881 else
1882 {
1883 /*
1884 * 4/2MB page - lazy syncing shadow 4K pages.
1885 * (There are many causes of getting here, it's no longer only CSAM.)
1886 */
1887 /* Calculate the GC physical address of this 4KB shadow page. */
1888 GCPhys = PGM_A20_APPLY(pVCpu, GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK));
1889 /* Find ram range. */
1890 PPGMPAGE pPage;
1891 int rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
1892 if (RT_SUCCESS(rc))
1893 {
1894 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
1895
1896# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1897 /* Try to make the page writable if necessary. */
1898 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1899 && ( PGM_PAGE_IS_ZERO(pPage)
1900 || ( (PdeSrc.u & X86_PDE_RW)
1901 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1902# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1903 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1904# endif
1905# ifdef VBOX_WITH_PAGE_SHARING
1906 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1907# endif
1908 )
1909 )
1910 )
1911 {
1912 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1913 AssertRC(rc);
1914 }
1915# endif
1916
1917 /*
1918 * Make shadow PTE entry.
1919 */
1920 SHWPTE PteDst;
1921 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1922 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc), &PteDst);
1923 else
1924 SHW_PTE_SET(PteDst, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc) | PGM_PAGE_GET_HCPHYS(pPage));
1925
1926 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1927 if ( SHW_PTE_IS_P(PteDst)
1928 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
1929 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1930
1931 /* Make sure only allocated pages are mapped writable. */
1932 if ( SHW_PTE_IS_P_RW(PteDst)
1933 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1934 {
1935 /* Still applies to shared pages. */
1936 Assert(!PGM_PAGE_IS_ZERO(pPage));
1937 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
1938 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1939 }
1940
1941 SHW_PTE_ATOMIC_SET2(pPTDst->a[iPTDst], PteDst);
1942
1943 /*
1944 * If the page is not flagged as dirty and is writable, then make it read-only
1945 * at PD level, so we can set the dirty bit when the page is modified.
1946 *
1947 * ASSUMES that page access handlers are implemented on page table entry level.
1948 * Thus we will first catch the dirty access and set PDE.D and restart. If
1949 * there is an access handler, we'll trap again and let it work on the problem.
1950 */
1951 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1952 * As for invlpg, it simply frees the whole shadow PT.
1953 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1954 if ((PdeSrc.u & (X86_PDE4M_D | X86_PDE_RW)) == X86_PDE_RW)
1955 {
1956 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
1957 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1958 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
1959 }
1960 else
1961 {
1962 PdeDst.u &= ~(SHWUINT)(PGM_PDFLAGS_TRACK_DIRTY | X86_PDE_RW);
1963 PdeDst.u |= PdeSrc.u & X86_PDE_RW;
1964 }
1965 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
1966 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1967 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US),
1968 (uint64_t)PdeSrc.u, GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1969 }
1970 else
1971 {
1972 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1973 /** @todo must wipe the shadow page table entry in this
1974 * case. */
1975 }
1976 }
1977 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
1978 return VINF_SUCCESS;
1979 }
1980
1981 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDNAs));
1982 }
1983 else if (fPdeValid)
1984 {
1985 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1986 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1987 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1988 }
1989 else
1990 {
1991/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSyncAndInvalid));
1992 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1993 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1994 }
1995
1996 /*
1997 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1998 * Yea, I'm lazy.
1999 */
2000 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
2001 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
2002
2003 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2004 PGM_INVL_VCPU_TLBS(pVCpu);
2005 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
2006
2007
2008# elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2009 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
2010 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
2011 NOREF(PdeSrc);
2012
2013# ifdef PGM_SYNC_N_PAGES
2014 /*
2015 * Get the shadow PDE, find the shadow page table in the pool.
2016 */
2017# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2018 X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
2019
2020# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2021 X86PDEPAE PdeDst = pgmShwGetPaePDE(pVCpu, GCPtrPage);
2022
2023# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2024 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2025 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2026 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2027 X86PDEPAE PdeDst;
2028 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2029
2030 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2031 AssertRCSuccessReturn(rc, rc);
2032 Assert(pPDDst && pPdptDst);
2033 PdeDst = pPDDst->a[iPDDst];
2034
2035# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2036 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2037 PEPTPD pPDDst;
2038 EPTPDE PdeDst;
2039
2040 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2041 if (rc != VINF_SUCCESS)
2042 {
2043 AssertRC(rc);
2044 return rc;
2045 }
2046 Assert(pPDDst);
2047 PdeDst = pPDDst->a[iPDDst];
2048# endif
2049 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2050 if (!SHW_PDE_IS_P(PdeDst))
2051 {
2052 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2053 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2054 return VINF_SUCCESS; /* force the instruction to be executed again. */
2055 }
2056
2057 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2058 if (SHW_PDE_IS_BIG(PdeDst))
2059 {
2060 Assert(pVM->pgm.s.fNestedPaging);
2061 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2062 return VINF_SUCCESS;
2063 }
2064
2065 /* Mask away the page offset. */
2066 GCPtrPage &= ~((RTGCPTR)0xfff);
2067
2068 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2069 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2070
2071 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2072 if ( cPages > 1
2073 && !(uErr & X86_TRAP_PF_P)
2074 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2075 {
2076 /*
2077 * This code path is currently only taken when the caller is PGMTrap0eHandler
2078 * for non-present pages!
2079 *
2080 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2081 * deal with locality.
2082 */
2083 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2084 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2085 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2086 iPTDst = 0;
2087 else
2088 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2089 for (; iPTDst < iPTDstEnd; iPTDst++)
2090 {
2091 if (!SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2092 {
2093 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2094 | (iPTDst << PAGE_SHIFT));
2095
2096 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2097 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
2098 GCPtrCurPage,
2099 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2100 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2101
2102 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
2103 break;
2104 }
2105 else
2106 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, SHW_PTE_LOG64(pPTDst->a[iPTDst]) ));
2107 }
2108 }
2109 else
2110# endif /* PGM_SYNC_N_PAGES */
2111 {
2112 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2113 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2114 | (iPTDst << PAGE_SHIFT));
2115
2116 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2117
2118 Log2(("SyncPage: 4K %RGv PteSrc:{P=1 RW=1 U=1}PteDst=%08llx%s\n",
2119 GCPtrPage,
2120 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2121 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2122 }
2123 return VINF_SUCCESS;
2124
2125# else
2126 NOREF(PdeSrc);
2127 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2128 return VERR_PGM_NOT_USED_IN_MODE;
2129# endif
2130}
2131
2132#endif /* PGM_SHW_TYPE != PGM_TYPE_NONE */
2133#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
2134
2135/**
2136 * CheckPageFault helper for returning a page fault indicating a non-present
2137 * (NP) entry in the page translation structures.
2138 *
2139 * @returns VINF_EM_RAW_GUEST_TRAP.
2140 * @param pVCpu The cross context virtual CPU structure.
2141 * @param uErr The error code of the shadow fault. Corrections to
2142 * TRPM's copy will be made if necessary.
2143 * @param GCPtrPage For logging.
2144 * @param uPageFaultLevel For logging.
2145 */
2146DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnNP)(PVMCPUCC pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2147{
2148 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyTrackRealPF));
2149 AssertMsg(!(uErr & X86_TRAP_PF_P), ("%#x\n", uErr));
2150 AssertMsg(!(uErr & X86_TRAP_PF_RSVD), ("%#x\n", uErr));
2151 if (uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2152 TRPMSetErrorCode(pVCpu, uErr & ~(X86_TRAP_PF_RSVD | X86_TRAP_PF_P));
2153
2154 Log(("CheckPageFault: real page fault (notp) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2155 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(uPageFaultLevel);
2156 return VINF_EM_RAW_GUEST_TRAP;
2157}
2158
2159
2160/**
2161 * CheckPageFault helper for returning a page fault indicating a reserved bit
2162 * (RSVD) error in the page translation structures.
2163 *
2164 * @returns VINF_EM_RAW_GUEST_TRAP.
2165 * @param pVCpu The cross context virtual CPU structure.
2166 * @param uErr The error code of the shadow fault. Corrections to
2167 * TRPM's copy will be made if necessary.
2168 * @param GCPtrPage For logging.
2169 * @param uPageFaultLevel For logging.
2170 */
2171DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnRSVD)(PVMCPUCC pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2172{
2173 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyTrackRealPF));
2174 if ((uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2175 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_RSVD | X86_TRAP_PF_P);
2176
2177 Log(("CheckPageFault: real page fault (rsvd) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2178 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(uPageFaultLevel);
2179 return VINF_EM_RAW_GUEST_TRAP;
2180}
2181
2182
2183/**
2184 * CheckPageFault helper for returning a page protection fault (P).
2185 *
2186 * @returns VINF_EM_RAW_GUEST_TRAP.
2187 * @param pVCpu The cross context virtual CPU structure.
2188 * @param uErr The error code of the shadow fault. Corrections to
2189 * TRPM's copy will be made if necessary.
2190 * @param GCPtrPage For logging.
2191 * @param uPageFaultLevel For logging.
2192 */
2193DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnProt)(PVMCPUCC pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2194{
2195 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyTrackRealPF));
2196 AssertMsg(uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID), ("%#x\n", uErr));
2197 if ((uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) != X86_TRAP_PF_P)
2198 TRPMSetErrorCode(pVCpu, (uErr & ~X86_TRAP_PF_RSVD) | X86_TRAP_PF_P);
2199
2200 Log(("CheckPageFault: real page fault (prot) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2201 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(uPageFaultLevel);
2202 return VINF_EM_RAW_GUEST_TRAP;
2203}
2204
2205
2206/**
2207 * Handle dirty bit tracking faults.
2208 *
2209 * @returns VBox status code.
2210 * @param pVCpu The cross context virtual CPU structure.
2211 * @param uErr Page fault error code.
2212 * @param pPdeSrc Guest page directory entry.
2213 * @param pPdeDst Shadow page directory entry.
2214 * @param GCPtrPage Guest context page address.
2215 */
2216static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc,
2217 RTGCPTR GCPtrPage)
2218{
2219 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2220 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2221 NOREF(uErr);
2222
2223 PGM_LOCK_ASSERT_OWNER(pVM);
2224
2225 /*
2226 * Handle big page.
2227 */
2228 if ((pPdeSrc->u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu))
2229 {
2230 if ((pPdeDst->u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
2231 {
2232 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
2233 Assert(pPdeSrc->u & X86_PDE_RW);
2234
2235 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2236 * fault again and take this path to only invalidate the entry (see below). */
2237 SHWPDE PdeDst = *pPdeDst;
2238 PdeDst.u &= ~(SHWUINT)PGM_PDFLAGS_TRACK_DIRTY;
2239 PdeDst.u |= X86_PDE_RW | X86_PDE_A;
2240 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2241 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2242 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2243 }
2244
2245# ifdef IN_RING0
2246 /* Check for stale TLB entry; only applies to the SMP guest case. */
2247 if ( pVM->cCpus > 1
2248 && (pPdeDst->u & (X86_PDE_P | X86_PDE_RW | X86_PDE_A)) == (X86_PDE_P | X86_PDE_RW | X86_PDE_A))
2249 {
2250 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2251 if (pShwPage)
2252 {
2253 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2254 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2255 if (SHW_PTE_IS_P_RW(*pPteDst))
2256 {
2257 /* Stale TLB entry. */
2258 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
2259 PGM_INVL_PG(pVCpu, GCPtrPage);
2260 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2261 }
2262 }
2263 }
2264# endif /* IN_RING0 */
2265 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2266 }
2267
2268 /*
2269 * Map the guest page table.
2270 */
2271 PGSTPT pPTSrc;
2272 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(*pPdeSrc), &pPTSrc);
2273 AssertRCReturn(rc, rc);
2274
2275 if (SHW_PDE_IS_P(*pPdeDst))
2276 {
2277 GSTPTE const *pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2278 const GSTPTE PteSrc = *pPteSrc;
2279
2280 /*
2281 * Map shadow page table.
2282 */
2283 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2284 if (pShwPage)
2285 {
2286 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2287 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2288 if (SHW_PTE_IS_P(*pPteDst)) /** @todo Optimize accessed bit emulation? */
2289 {
2290 if (SHW_PTE_IS_TRACK_DIRTY(*pPteDst))
2291 {
2292 PPGMPAGE pPage = pgmPhysGetPage(pVM, GST_GET_PTE_GCPHYS(PteSrc));
2293 SHWPTE PteDst = *pPteDst;
2294
2295 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2296 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
2297
2298 Assert(PteSrc.u & X86_PTE_RW);
2299
2300 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
2301 * entry will not harm; write access will simply fault again and
2302 * take this path to only invalidate the entry.
2303 */
2304 if (RT_LIKELY(pPage))
2305 {
2306 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2307 {
2308 //AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
2309 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
2310 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2311 SHW_PTE_SET_RO(PteDst);
2312 }
2313 else
2314 {
2315 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2316 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2317 {
2318 rc = pgmPhysPageMakeWritable(pVM, pPage, GST_GET_PTE_GCPHYS(PteSrc));
2319 AssertRC(rc);
2320 }
2321 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2322 SHW_PTE_SET_RW(PteDst);
2323 else
2324 {
2325 /* Still applies to shared pages. */
2326 Assert(!PGM_PAGE_IS_ZERO(pPage));
2327 SHW_PTE_SET_RO(PteDst);
2328 }
2329 }
2330 }
2331 else
2332 SHW_PTE_SET_RW(PteDst); /** @todo r=bird: This doesn't make sense to me. */
2333
2334 SHW_PTE_SET(PteDst, (SHW_PTE_GET_U(PteDst) | X86_PTE_D | X86_PTE_A) & ~(uint64_t)PGM_PTFLAGS_TRACK_DIRTY);
2335 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
2336 PGM_INVL_PG(pVCpu, GCPtrPage);
2337 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2338 }
2339
2340# ifdef IN_RING0
2341 /* Check for stale TLB entry; only applies to the SMP guest case. */
2342 if ( pVM->cCpus > 1
2343 && SHW_PTE_IS_RW(*pPteDst)
2344 && SHW_PTE_IS_A(*pPteDst))
2345 {
2346 /* Stale TLB entry. */
2347 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
2348 PGM_INVL_PG(pVCpu, GCPtrPage);
2349 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2350 }
2351# endif
2352 }
2353 }
2354 else
2355 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2356 }
2357
2358 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2359}
2360
2361#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
2362
2363/**
2364 * Sync a shadow page table.
2365 *
2366 * The shadow page table is not present in the shadow PDE.
2367 *
2368 * Handles mapping conflicts.
2369 *
2370 * This is called by VerifyAccessSyncPage, PrefetchPage, InvalidatePage (on
2371 * conflict), and Trap0eHandler.
2372 *
2373 * A precondition for this method is that the shadow PDE is not present. The
2374 * caller must take the PGM lock before checking this and continue to hold it
2375 * when calling this method.
2376 *
2377 * @returns VBox status code.
2378 * @param pVCpu The cross context virtual CPU structure.
2379 * @param iPDSrc Page directory index.
2380 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2381 * Assume this is a temporary mapping.
2382 * @param GCPtrPage GC Pointer of the page that caused the fault
2383 */
2384static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2385{
2386 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2387 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2388
2389#if 0 /* rarely useful; leave for debugging. */
2390 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2391#endif
2392 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2393
2394 PGM_LOCK_ASSERT_OWNER(pVM);
2395
2396#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2397 || PGM_GST_TYPE == PGM_TYPE_PAE \
2398 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2399 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
2400 && PGM_SHW_TYPE != PGM_TYPE_NONE
2401 int rc = VINF_SUCCESS;
2402
2403 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2404
2405 /*
2406 * Some input validation first.
2407 */
2408 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2409
2410 /*
2411 * Get the relevant shadow PDE entry.
2412 */
2413# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2414 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2415 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
2416
2417 /* Fetch the pgm pool shadow descriptor. */
2418 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2419 Assert(pShwPde);
2420
2421# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2422 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2423 PPGMPOOLPAGE pShwPde = NULL;
2424 PX86PDPAE pPDDst;
2425 PSHWPDE pPdeDst;
2426
2427 /* Fetch the pgm pool shadow descriptor. */
2428 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
2429 AssertRCSuccessReturn(rc, rc);
2430 Assert(pShwPde);
2431
2432 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
2433 pPdeDst = &pPDDst->a[iPDDst];
2434
2435# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2436 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2437 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2438 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2439 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2440 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2441 AssertRCSuccessReturn(rc, rc);
2442 Assert(pPDDst);
2443 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2444
2445# endif
2446 SHWPDE PdeDst = *pPdeDst;
2447
2448# if PGM_GST_TYPE == PGM_TYPE_AMD64
2449 /* Fetch the pgm pool shadow descriptor. */
2450 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2451 Assert(pShwPde);
2452# endif
2453
2454 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P.*/
2455
2456 /*
2457 * Sync the page directory entry.
2458 */
2459 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2460 const bool fPageTable = !(PdeSrc.u & X86_PDE_PS) || !GST_IS_PSE_ACTIVE(pVCpu);
2461 if ( (PdeSrc.u & X86_PDE_P)
2462 && (fPageTable ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc)) )
2463 {
2464 /*
2465 * Allocate & map the page table.
2466 */
2467 PSHWPT pPTDst;
2468 PPGMPOOLPAGE pShwPage;
2469 RTGCPHYS GCPhys;
2470 if (fPageTable)
2471 {
2472 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
2473# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2474 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2475 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (PAGE_SIZE / 2)));
2476# endif
2477 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
2478 pShwPde->idx, iPDDst, false /*fLockPage*/,
2479 &pShwPage);
2480 }
2481 else
2482 {
2483 PGMPOOLACCESS enmAccess;
2484# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2485 const bool fNoExecute = (PdeSrc.u & X86_PDE_PAE_NX) && GST_IS_NX_ACTIVE(pVCpu);
2486# else
2487 const bool fNoExecute = false;
2488# endif
2489
2490 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
2491# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2492 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2493 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
2494# endif
2495 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2496 if (PdeSrc.u & X86_PDE_US)
2497 {
2498 if (PdeSrc.u & X86_PDE_RW)
2499 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2500 else
2501 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2502 }
2503 else
2504 {
2505 if (PdeSrc.u & X86_PDE_RW)
2506 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2507 else
2508 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2509 }
2510 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, PGM_A20_IS_ENABLED(pVCpu),
2511 pShwPde->idx, iPDDst, false /*fLockPage*/,
2512 &pShwPage);
2513 }
2514 if (rc == VINF_SUCCESS)
2515 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2516 else if (rc == VINF_PGM_CACHED_PAGE)
2517 {
2518 /*
2519 * The PT was cached, just hook it up.
2520 */
2521 if (fPageTable)
2522 PdeDst.u = pShwPage->Core.Key | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2523 else
2524 {
2525 PdeDst.u = pShwPage->Core.Key | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2526 /* (see explanation and assumptions further down.) */
2527 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
2528 {
2529 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2530 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2531 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2532 }
2533 }
2534 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2535 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2536 return VINF_SUCCESS;
2537 }
2538 else
2539 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
2540 /** @todo Why do we bother preserving X86_PDE_AVL_MASK here?
2541 * Both PGM_PDFLAGS_MAPPING and PGM_PDFLAGS_TRACK_DIRTY should be
2542 * irrelevant at this point. */
2543 PdeDst.u &= X86_PDE_AVL_MASK;
2544 PdeDst.u |= pShwPage->Core.Key;
2545
2546 /*
2547 * Page directory has been accessed (this is a fault situation, remember).
2548 */
2549 /** @todo
2550 * Well, when the caller is PrefetchPage or InvalidatePage is isn't a
2551 * fault situation. What's more, the Trap0eHandler has already set the
2552 * accessed bit. So, it's actually just VerifyAccessSyncPage which
2553 * might need setting the accessed flag.
2554 *
2555 * The best idea is to leave this change to the caller and add an
2556 * assertion that it's set already. */
2557 pPDSrc->a[iPDSrc].u |= X86_PDE_A;
2558 if (fPageTable)
2559 {
2560 /*
2561 * Page table - 4KB.
2562 *
2563 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2564 */
2565 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2566 GCPtrPage, PdeSrc.u & X86_PTE_P, !!(PdeSrc.u & X86_PTE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u));
2567 PGSTPT pPTSrc;
2568 rc = PGM_GCPHYS_2_PTR(pVM, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
2569 if (RT_SUCCESS(rc))
2570 {
2571 /*
2572 * Start by syncing the page directory entry so CSAM's TLB trick works.
2573 */
2574 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2575 | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2576 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2577 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2578
2579 /*
2580 * Directory/page user or supervisor privilege: (same goes for read/write)
2581 *
2582 * Directory Page Combined
2583 * U/S U/S U/S
2584 * 0 0 0
2585 * 0 1 0
2586 * 1 0 0
2587 * 1 1 1
2588 *
2589 * Simple AND operation. Table listed for completeness.
2590 *
2591 */
2592 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4K));
2593# ifdef PGM_SYNC_N_PAGES
2594 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2595 unsigned iPTDst = iPTBase;
2596 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2597 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2598 iPTDst = 0;
2599 else
2600 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2601# else /* !PGM_SYNC_N_PAGES */
2602 unsigned iPTDst = 0;
2603 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2604# endif /* !PGM_SYNC_N_PAGES */
2605 RTGCPTR GCPtrCur = (GCPtrPage & ~(RTGCPTR)((1 << SHW_PD_SHIFT) - 1))
2606 | ((RTGCPTR)iPTDst << PAGE_SHIFT);
2607# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2608 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2609 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2610# else
2611 const unsigned offPTSrc = 0;
2612# endif
2613 for (; iPTDst < iPTDstEnd; iPTDst++, GCPtrCur += PAGE_SIZE)
2614 {
2615 const unsigned iPTSrc = iPTDst + offPTSrc;
2616 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2617 if (PteSrc.u & X86_PTE_P)
2618 {
2619 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2620 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2621 GCPtrCur,
2622 PteSrc.u & X86_PTE_P,
2623 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
2624 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
2625 (uint64_t)PteSrc.u,
2626 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : "", SHW_PTE_LOG64(pPTDst->a[iPTDst]), iPTSrc, PdeSrc.au32[0],
2627 (RTGCPHYS)(GST_GET_PDE_GCPHYS(PdeSrc) + iPTSrc*sizeof(PteSrc)) ));
2628 }
2629 /* else: the page table was cleared by the pool */
2630 } /* for PTEs */
2631 }
2632 }
2633 else
2634 {
2635 /*
2636 * Big page - 2/4MB.
2637 *
2638 * We'll walk the ram range list in parallel and optimize lookups.
2639 * We will only sync one shadow page table at a time.
2640 */
2641 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4M));
2642
2643 /**
2644 * @todo It might be more efficient to sync only a part of the 4MB
2645 * page (similar to what we do for 4KB PDs).
2646 */
2647
2648 /*
2649 * Start by syncing the page directory entry.
2650 */
2651 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2652 | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2653
2654 /*
2655 * If the page is not flagged as dirty and is writable, then make it read-only
2656 * at PD level, so we can set the dirty bit when the page is modified.
2657 *
2658 * ASSUMES that page access handlers are implemented on page table entry level.
2659 * Thus we will first catch the dirty access and set PDE.D and restart. If
2660 * there is an access handler, we'll trap again and let it work on the problem.
2661 */
2662 /** @todo move the above stuff to a section in the PGM documentation. */
2663 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2664 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
2665 {
2666 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2667 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2668 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2669 }
2670 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2671 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2672
2673 /*
2674 * Fill the shadow page table.
2675 */
2676 /* Get address and flags from the source PDE. */
2677 SHWPTE PteDstBase;
2678 SHW_PTE_SET(PteDstBase, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc));
2679
2680 /* Loop thru the entries in the shadow PT. */
2681 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2682 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2683 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u, GCPtr,
2684 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2685 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
2686 unsigned iPTDst = 0;
2687 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2688 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2689 {
2690 if (pRam && GCPhys >= pRam->GCPhys)
2691 {
2692# ifndef PGM_WITH_A20
2693 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2694# endif
2695 do
2696 {
2697 /* Make shadow PTE. */
2698# ifdef PGM_WITH_A20
2699 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
2700# else
2701 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2702# endif
2703 SHWPTE PteDst;
2704
2705# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2706 /* Try to make the page writable if necessary. */
2707 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2708 && ( PGM_PAGE_IS_ZERO(pPage)
2709 || ( SHW_PTE_IS_RW(PteDstBase)
2710 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2711# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2712 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2713# endif
2714# ifdef VBOX_WITH_PAGE_SHARING
2715 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2716# endif
2717 && !PGM_PAGE_IS_BALLOONED(pPage))
2718 )
2719 )
2720 {
2721 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2722 AssertRCReturn(rc, rc);
2723 if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2724 break;
2725 }
2726# endif
2727
2728 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2729 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, SHW_PTE_GET_U(PteDstBase), &PteDst);
2730 else if (PGM_PAGE_IS_BALLOONED(pPage))
2731 SHW_PTE_SET(PteDst, 0); /* Handle ballooned pages at #PF time. */
2732 else
2733 SHW_PTE_SET(PteDst, PGM_PAGE_GET_HCPHYS(pPage) | SHW_PTE_GET_U(PteDstBase));
2734
2735 /* Only map writable pages writable. */
2736 if ( SHW_PTE_IS_P_RW(PteDst)
2737 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2738 {
2739 /* Still applies to shared pages. */
2740 Assert(!PGM_PAGE_IS_ZERO(pPage));
2741 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2742 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2743 }
2744
2745 if (SHW_PTE_IS_P(PteDst))
2746 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2747
2748 /* commit it (not atomic, new table) */
2749 pPTDst->a[iPTDst] = PteDst;
2750 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2751 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), SHW_PTE_IS_P(PteDst), SHW_PTE_IS_RW(PteDst), SHW_PTE_IS_US(PteDst), SHW_PTE_LOG64(PteDst),
2752 SHW_PTE_IS_TRACK_DIRTY(PteDst) ? " Track-Dirty" : ""));
2753
2754 /* advance */
2755 GCPhys += PAGE_SIZE;
2756 PGM_A20_APPLY_TO_VAR(pVCpu, GCPhys);
2757# ifndef PGM_WITH_A20
2758 iHCPage++;
2759# endif
2760 iPTDst++;
2761 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2762 && GCPhys <= pRam->GCPhysLast);
2763
2764 /* Advance ram range list. */
2765 while (pRam && GCPhys > pRam->GCPhysLast)
2766 pRam = pRam->CTX_SUFF(pNext);
2767 }
2768 else if (pRam)
2769 {
2770 Log(("Invalid pages at %RGp\n", GCPhys));
2771 do
2772 {
2773 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2774 GCPhys += PAGE_SIZE;
2775 iPTDst++;
2776 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2777 && GCPhys < pRam->GCPhys);
2778 PGM_A20_APPLY_TO_VAR(pVCpu,GCPhys);
2779 }
2780 else
2781 {
2782 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2783 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2784 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2785 }
2786 } /* while more PTEs */
2787 } /* 4KB / 4MB */
2788 }
2789 else
2790 AssertRelease(!SHW_PDE_IS_P(PdeDst));
2791
2792 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2793 if (RT_FAILURE(rc))
2794 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
2795 return rc;
2796
2797#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2798 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
2799 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2800 && PGM_SHW_TYPE != PGM_TYPE_NONE
2801 NOREF(iPDSrc); NOREF(pPDSrc);
2802
2803 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2804
2805 /*
2806 * Validate input a little bit.
2807 */
2808 int rc = VINF_SUCCESS;
2809# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2810 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2811 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
2812
2813 /* Fetch the pgm pool shadow descriptor. */
2814 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2815 Assert(pShwPde);
2816
2817# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2818 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2819 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
2820 PX86PDPAE pPDDst;
2821 PSHWPDE pPdeDst;
2822
2823 /* Fetch the pgm pool shadow descriptor. */
2824 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
2825 AssertRCSuccessReturn(rc, rc);
2826 Assert(pShwPde);
2827
2828 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
2829 pPdeDst = &pPDDst->a[iPDDst];
2830
2831# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2832 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2833 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2834 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2835 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
2836 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2837 AssertRCSuccessReturn(rc, rc);
2838 Assert(pPDDst);
2839 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2840
2841 /* Fetch the pgm pool shadow descriptor. */
2842 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2843 Assert(pShwPde);
2844
2845# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2846 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2847 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2848 PEPTPD pPDDst;
2849 PEPTPDPT pPdptDst;
2850
2851 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
2852 if (rc != VINF_SUCCESS)
2853 {
2854 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2855 AssertRC(rc);
2856 return rc;
2857 }
2858 Assert(pPDDst);
2859 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2860
2861 /* Fetch the pgm pool shadow descriptor. */
2862 /** @todo r=bird: didn't pgmShwGetEPTPDPtr just do this lookup already? */
2863 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2864 Assert(pShwPde);
2865# endif
2866 SHWPDE PdeDst = *pPdeDst;
2867
2868 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2869
2870# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
2871 if ( BTH_IS_NP_ACTIVE(pVM)
2872 && !VM_IS_NEM_ENABLED(pVM)) /** @todo NEM: Large page support. */
2873 {
2874 /* Check if we allocated a big page before for this 2 MB range. */
2875 PPGMPAGE pPage;
2876 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PDE2M_PAE_PG_MASK), &pPage);
2877 if (RT_SUCCESS(rc))
2878 {
2879 RTHCPHYS HCPhys = NIL_RTHCPHYS;
2880 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
2881 {
2882 if (PGM_A20_IS_ENABLED(pVCpu))
2883 {
2884 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
2885 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2886 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2887 }
2888 else
2889 {
2890 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
2891 pVM->pgm.s.cLargePagesDisabled++;
2892 }
2893 }
2894 else if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED
2895 && PGM_A20_IS_ENABLED(pVCpu))
2896 {
2897 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
2898 rc = pgmPhysRecheckLargePage(pVM, GCPtrPage, pPage);
2899 if (RT_SUCCESS(rc))
2900 {
2901 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2902 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
2903 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2904 }
2905 }
2906 else if ( PGMIsUsingLargePages(pVM)
2907 && PGM_A20_IS_ENABLED(pVCpu))
2908 {
2909 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
2910 if (RT_SUCCESS(rc))
2911 {
2912 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2913 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
2914 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2915 }
2916 else
2917 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
2918 }
2919
2920 if (HCPhys != NIL_RTHCPHYS)
2921 {
2922# if PGM_SHW_TYPE == PGM_TYPE_EPT
2923 PdeDst.u = HCPhys | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_LEAF | EPT_E_IGNORE_PAT | EPT_E_MEMTYPE_WB
2924 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo do we need this? */;
2925# else
2926 PdeDst.u = HCPhys | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PS
2927 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo PGM_PD_FLAGS? */;
2928# endif
2929 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2930
2931 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
2932 /* Add a reference to the first page only. */
2933 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
2934
2935 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2936 return VINF_SUCCESS;
2937 }
2938 }
2939 }
2940# endif /* defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE */
2941
2942 /*
2943 * Allocate & map the page table.
2944 */
2945 PSHWPT pPTDst;
2946 PPGMPOOLPAGE pShwPage;
2947 RTGCPHYS GCPhys;
2948
2949 /* Virtual address = physical address */
2950 GCPhys = PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PAGE_4K_BASE_MASK);
2951 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE,
2952 PGM_A20_IS_ENABLED(pVCpu), pShwPde->idx, iPDDst, false /*fLockPage*/,
2953 &pShwPage);
2954 if ( rc == VINF_SUCCESS
2955 || rc == VINF_PGM_CACHED_PAGE)
2956 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2957 else
2958 {
2959 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2960 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
2961 }
2962
2963 if (rc == VINF_SUCCESS)
2964 {
2965 /* New page table; fully set it up. */
2966 Assert(pPTDst);
2967
2968 /* Mask away the page offset. */
2969 GCPtrPage &= ~(RTGCPTR)PAGE_OFFSET_MASK;
2970
2971 for (unsigned iPTDst = 0; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2972 {
2973 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2974 | (iPTDst << PAGE_SHIFT));
2975
2976 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2977 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
2978 GCPtrCurPage,
2979 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2980 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2981
2982 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
2983 break;
2984 }
2985 }
2986 else
2987 rc = VINF_SUCCESS; /* Cached entry; assume it's still fully valid. */
2988
2989 /* Save the new PDE. */
2990# if PGM_SHW_TYPE == PGM_TYPE_EPT
2991 PdeDst.u = pShwPage->Core.Key | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE
2992 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo do we really need this? */);
2993# else
2994 PdeDst.u = pShwPage->Core.Key | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A
2995 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo use a PGM_PD_FLAGS define */);
2996# endif
2997 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2998
2999 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3000 if (RT_FAILURE(rc))
3001 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
3002 return rc;
3003
3004#else
3005 NOREF(iPDSrc); NOREF(pPDSrc);
3006 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3007 return VERR_PGM_NOT_USED_IN_MODE;
3008#endif
3009}
3010
3011
3012
3013/**
3014 * Prefetch a page/set of pages.
3015 *
3016 * Typically used to sync commonly used pages before entering raw mode
3017 * after a CR3 reload.
3018 *
3019 * @returns VBox status code.
3020 * @param pVCpu The cross context virtual CPU structure.
3021 * @param GCPtrPage Page to invalidate.
3022 */
3023PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
3024{
3025#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3026 || PGM_GST_TYPE == PGM_TYPE_REAL \
3027 || PGM_GST_TYPE == PGM_TYPE_PROT \
3028 || PGM_GST_TYPE == PGM_TYPE_PAE \
3029 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3030 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3031 && PGM_SHW_TYPE != PGM_TYPE_NONE
3032 /*
3033 * Check that all Guest levels thru the PDE are present, getting the
3034 * PD and PDE in the processes.
3035 */
3036 int rc = VINF_SUCCESS;
3037# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3038# if PGM_GST_TYPE == PGM_TYPE_32BIT
3039 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
3040 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3041# elif PGM_GST_TYPE == PGM_TYPE_PAE
3042 unsigned iPDSrc;
3043 X86PDPE PdpeSrc;
3044 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3045 if (!pPDSrc)
3046 return VINF_SUCCESS; /* not present */
3047# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3048 unsigned iPDSrc;
3049 PX86PML4E pPml4eSrc;
3050 X86PDPE PdpeSrc;
3051 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3052 if (!pPDSrc)
3053 return VINF_SUCCESS; /* not present */
3054# endif
3055 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3056# else
3057 PGSTPD pPDSrc = NULL;
3058 const unsigned iPDSrc = 0;
3059 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
3060# endif
3061
3062 if ((PdeSrc.u & (X86_PDE_P | X86_PDE_A)) == (X86_PDE_P | X86_PDE_A))
3063 {
3064 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3065 PGM_LOCK_VOID(pVM);
3066
3067# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3068 const X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
3069# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3070 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3071 PX86PDPAE pPDDst;
3072 X86PDEPAE PdeDst;
3073# if PGM_GST_TYPE != PGM_TYPE_PAE
3074 X86PDPE PdpeSrc;
3075
3076 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3077 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3078# endif
3079 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3080 if (rc != VINF_SUCCESS)
3081 {
3082 PGM_UNLOCK(pVM);
3083 AssertRC(rc);
3084 return rc;
3085 }
3086 Assert(pPDDst);
3087 PdeDst = pPDDst->a[iPDDst];
3088
3089# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3090 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3091 PX86PDPAE pPDDst;
3092 X86PDEPAE PdeDst;
3093
3094# if PGM_GST_TYPE == PGM_TYPE_PROT
3095 /* AMD-V nested paging */
3096 X86PML4E Pml4eSrc;
3097 X86PDPE PdpeSrc;
3098 PX86PML4E pPml4eSrc = &Pml4eSrc;
3099
3100 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3101 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3102 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3103# endif
3104
3105 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3106 if (rc != VINF_SUCCESS)
3107 {
3108 PGM_UNLOCK(pVM);
3109 AssertRC(rc);
3110 return rc;
3111 }
3112 Assert(pPDDst);
3113 PdeDst = pPDDst->a[iPDDst];
3114# endif
3115 if (!(PdeDst.u & X86_PDE_P))
3116 {
3117 /** @todo r=bird: This guy will set the A bit on the PDE,
3118 * probably harmless. */
3119 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3120 }
3121 else
3122 {
3123 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3124 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3125 * makes no sense to prefetch more than one page.
3126 */
3127 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3128 if (RT_SUCCESS(rc))
3129 rc = VINF_SUCCESS;
3130 }
3131 PGM_UNLOCK(pVM);
3132 }
3133 return rc;
3134
3135#elif PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3136 NOREF(pVCpu); NOREF(GCPtrPage);
3137 return VINF_SUCCESS; /* ignore */
3138#else
3139 AssertCompile(0);
3140#endif
3141}
3142
3143
3144
3145
3146/**
3147 * Syncs a page during a PGMVerifyAccess() call.
3148 *
3149 * @returns VBox status code (informational included).
3150 * @param pVCpu The cross context virtual CPU structure.
3151 * @param GCPtrPage The address of the page to sync.
3152 * @param fPage The effective guest page flags.
3153 * @param uErr The trap error code.
3154 * @remarks This will normally never be called on invalid guest page
3155 * translation entries.
3156 */
3157PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3158{
3159 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
3160
3161 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3162 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(fPage); RT_NOREF_PV(uErr);
3163
3164 Assert(!pVM->pgm.s.fNestedPaging);
3165#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3166 || PGM_GST_TYPE == PGM_TYPE_REAL \
3167 || PGM_GST_TYPE == PGM_TYPE_PROT \
3168 || PGM_GST_TYPE == PGM_TYPE_PAE \
3169 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3170 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3171 && PGM_SHW_TYPE != PGM_TYPE_NONE
3172
3173 /*
3174 * Get guest PD and index.
3175 */
3176 /** @todo Performance: We've done all this a jiffy ago in the
3177 * PGMGstGetPage call. */
3178# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3179# if PGM_GST_TYPE == PGM_TYPE_32BIT
3180 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
3181 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3182
3183# elif PGM_GST_TYPE == PGM_TYPE_PAE
3184 unsigned iPDSrc = 0;
3185 X86PDPE PdpeSrc;
3186 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3187 if (RT_UNLIKELY(!pPDSrc))
3188 {
3189 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3190 return VINF_EM_RAW_GUEST_TRAP;
3191 }
3192
3193# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3194 unsigned iPDSrc = 0; /* shut up gcc */
3195 PX86PML4E pPml4eSrc = NULL; /* ditto */
3196 X86PDPE PdpeSrc;
3197 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3198 if (RT_UNLIKELY(!pPDSrc))
3199 {
3200 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3201 return VINF_EM_RAW_GUEST_TRAP;
3202 }
3203# endif
3204
3205# else /* !PGM_WITH_PAGING */
3206 PGSTPD pPDSrc = NULL;
3207 const unsigned iPDSrc = 0;
3208# endif /* !PGM_WITH_PAGING */
3209 int rc = VINF_SUCCESS;
3210
3211 PGM_LOCK_VOID(pVM);
3212
3213 /*
3214 * First check if the shadow pd is present.
3215 */
3216# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3217 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3218
3219# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3220 PX86PDEPAE pPdeDst;
3221 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3222 PX86PDPAE pPDDst;
3223# if PGM_GST_TYPE != PGM_TYPE_PAE
3224 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3225 X86PDPE PdpeSrc;
3226 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3227# endif
3228 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3229 if (rc != VINF_SUCCESS)
3230 {
3231 PGM_UNLOCK(pVM);
3232 AssertRC(rc);
3233 return rc;
3234 }
3235 Assert(pPDDst);
3236 pPdeDst = &pPDDst->a[iPDDst];
3237
3238# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3239 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3240 PX86PDPAE pPDDst;
3241 PX86PDEPAE pPdeDst;
3242
3243# if PGM_GST_TYPE == PGM_TYPE_PROT
3244 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3245 X86PML4E Pml4eSrc;
3246 X86PDPE PdpeSrc;
3247 PX86PML4E pPml4eSrc = &Pml4eSrc;
3248 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3249 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3250# endif
3251
3252 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3253 if (rc != VINF_SUCCESS)
3254 {
3255 PGM_UNLOCK(pVM);
3256 AssertRC(rc);
3257 return rc;
3258 }
3259 Assert(pPDDst);
3260 pPdeDst = &pPDDst->a[iPDDst];
3261# endif
3262
3263 if (!(pPdeDst->u & X86_PDE_P))
3264 {
3265 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3266 if (rc != VINF_SUCCESS)
3267 {
3268 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3269 PGM_UNLOCK(pVM);
3270 AssertRC(rc);
3271 return rc;
3272 }
3273 }
3274
3275# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3276 /* Check for dirty bit fault */
3277 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3278 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3279 Log(("PGMVerifyAccess: success (dirty)\n"));
3280 else
3281# endif
3282 {
3283# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3284 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3285# else
3286 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
3287# endif
3288
3289 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3290 if (uErr & X86_TRAP_PF_US)
3291 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
3292 else /* supervisor */
3293 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3294
3295 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3296 if (RT_SUCCESS(rc))
3297 {
3298 /* Page was successfully synced */
3299 Log2(("PGMVerifyAccess: success (sync)\n"));
3300 rc = VINF_SUCCESS;
3301 }
3302 else
3303 {
3304 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
3305 rc = VINF_EM_RAW_GUEST_TRAP;
3306 }
3307 }
3308 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3309 PGM_UNLOCK(pVM);
3310 return rc;
3311
3312#else /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
3313
3314 AssertLogRelMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3315 return VERR_PGM_NOT_USED_IN_MODE;
3316#endif /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
3317}
3318
3319
3320/**
3321 * Syncs the paging hierarchy starting at CR3.
3322 *
3323 * @returns VBox status code, R0/RC may return VINF_PGM_SYNC_CR3, no other
3324 * informational status codes.
3325 * @retval VERR_PGM_NO_HYPERVISOR_ADDRESS in raw-mode when we're unable to map
3326 * the VMM into guest context.
3327 * @param pVCpu The cross context virtual CPU structure.
3328 * @param cr0 Guest context CR0 register.
3329 * @param cr3 Guest context CR3 register. Not subjected to the A20
3330 * mask.
3331 * @param cr4 Guest context CR4 register.
3332 * @param fGlobal Including global page directories or not
3333 */
3334PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3335{
3336 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
3337 NOREF(cr0); NOREF(cr3); NOREF(cr4); NOREF(fGlobal);
3338
3339 LogFlow(("SyncCR3 FF=%d fGlobal=%d\n", !!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), fGlobal));
3340
3341#if !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
3342# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3343 PGM_LOCK_VOID(pVM);
3344 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3345 if (pPool->cDirtyPages)
3346 pgmPoolResetDirtyPages(pVM);
3347 PGM_UNLOCK(pVM);
3348# endif
3349#endif /* !NESTED && !EPT */
3350
3351#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3352 /*
3353 * Nested / EPT / None - No work.
3354 */
3355 return VINF_SUCCESS;
3356
3357#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3358 /*
3359 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3360 * out the shadow parts when the guest modifies its tables.
3361 */
3362 return VINF_SUCCESS;
3363
3364#else /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3365
3366 return VINF_SUCCESS;
3367#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3368}
3369
3370
3371
3372
3373#ifdef VBOX_STRICT
3374
3375/**
3376 * Checks that the shadow page table is in sync with the guest one.
3377 *
3378 * @returns The number of errors.
3379 * @param pVCpu The cross context virtual CPU structure.
3380 * @param cr3 Guest context CR3 register.
3381 * @param cr4 Guest context CR4 register.
3382 * @param GCPtr Where to start. Defaults to 0.
3383 * @param cb How much to check. Defaults to everything.
3384 */
3385PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3386{
3387 NOREF(pVCpu); NOREF(cr3); NOREF(cr4); NOREF(GCPtr); NOREF(cb);
3388#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3389 return 0;
3390#else
3391 unsigned cErrors = 0;
3392 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3393 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3394
3395# if PGM_GST_TYPE == PGM_TYPE_PAE
3396 /** @todo currently broken; crashes below somewhere */
3397 AssertFailed();
3398# endif
3399
3400# if PGM_GST_TYPE == PGM_TYPE_32BIT \
3401 || PGM_GST_TYPE == PGM_TYPE_PAE \
3402 || PGM_GST_TYPE == PGM_TYPE_AMD64
3403
3404 bool fBigPagesSupported = GST_IS_PSE_ACTIVE(pVCpu);
3405 PPGMCPU pPGM = &pVCpu->pgm.s;
3406 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3407 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3408# ifndef IN_RING0
3409 RTHCPHYS HCPhys; /* general usage. */
3410# endif
3411 int rc;
3412
3413 /*
3414 * Check that the Guest CR3 and all its mappings are correct.
3415 */
3416 AssertMsgReturn(pPGM->GCPhysCR3 == PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK),
3417 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3418 false);
3419# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3420# if 0
3421# if PGM_GST_TYPE == PGM_TYPE_32BIT
3422 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3423# else
3424 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3425# endif
3426 AssertRCReturn(rc, 1);
3427 HCPhys = NIL_RTHCPHYS;
3428 rc = pgmRamGCPhys2HCPhys(pVM, PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK), &HCPhys);
3429 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3430# endif
3431# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3432 pgmGstGet32bitPDPtr(pVCpu);
3433 RTGCPHYS GCPhys;
3434 rc = PGMR3DbgR3Ptr2GCPhys(pVM->pUVM, pPGM->pGst32BitPdR3, &GCPhys);
3435 AssertRCReturn(rc, 1);
3436 AssertMsgReturn(PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3437# endif
3438# endif /* !IN_RING0 */
3439
3440 /*
3441 * Get and check the Shadow CR3.
3442 */
3443# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3444 unsigned cPDEs = X86_PG_ENTRIES;
3445 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3446# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3447# if PGM_GST_TYPE == PGM_TYPE_32BIT
3448 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3449# else
3450 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3451# endif
3452 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3453# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3454 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3455 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3456# endif
3457 if (cb != ~(RTGCPTR)0)
3458 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3459
3460/** @todo call the other two PGMAssert*() functions. */
3461
3462# if PGM_GST_TYPE == PGM_TYPE_AMD64
3463 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3464
3465 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3466 {
3467 PPGMPOOLPAGE pShwPdpt = NULL;
3468 PX86PML4E pPml4eSrc;
3469 PX86PML4E pPml4eDst;
3470 RTGCPHYS GCPhysPdptSrc;
3471
3472 pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
3473 pPml4eDst = pgmShwGetLongModePML4EPtr(pVCpu, iPml4);
3474
3475 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3476 if (!(pPml4eDst->u & X86_PML4E_P))
3477 {
3478 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3479 continue;
3480 }
3481
3482 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3483 GCPhysPdptSrc = PGM_A20_APPLY(pVCpu, pPml4eSrc->u & X86_PML4E_PG_MASK);
3484
3485 if ((pPml4eSrc->u & X86_PML4E_P) != (pPml4eDst->u & X86_PML4E_P))
3486 {
3487 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3488 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3489 cErrors++;
3490 continue;
3491 }
3492
3493 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3494 {
3495 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3496 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3497 cErrors++;
3498 continue;
3499 }
3500
3501 if ( (pPml4eDst->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX))
3502 != (pPml4eSrc->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX)))
3503 {
3504 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3505 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3506 cErrors++;
3507 continue;
3508 }
3509# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3510 {
3511# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3512
3513# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3514 /*
3515 * Check the PDPTEs too.
3516 */
3517 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3518
3519 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3520 {
3521 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3522 PPGMPOOLPAGE pShwPde = NULL;
3523 PX86PDPE pPdpeDst;
3524 RTGCPHYS GCPhysPdeSrc;
3525 X86PDPE PdpeSrc;
3526 PdpeSrc.u = 0; /* initialized to shut up gcc 4.5 */
3527# if PGM_GST_TYPE == PGM_TYPE_PAE
3528 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
3529 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
3530# else
3531 PX86PML4E pPml4eSrcIgn;
3532 PX86PDPT pPdptDst;
3533 PX86PDPAE pPDDst;
3534 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3535
3536 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3537 if (rc != VINF_SUCCESS)
3538 {
3539 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3540 GCPtr += 512 * _2M;
3541 continue; /* next PDPTE */
3542 }
3543 Assert(pPDDst);
3544# endif
3545 Assert(iPDSrc == 0);
3546
3547 pPdpeDst = &pPdptDst->a[iPdpt];
3548
3549 if (!(pPdpeDst->u & X86_PDPE_P))
3550 {
3551 GCPtr += 512 * _2M;
3552 continue; /* next PDPTE */
3553 }
3554
3555 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3556 GCPhysPdeSrc = PGM_A20_APPLY(pVCpu, PdpeSrc.u & X86_PDPE_PG_MASK);
3557
3558 if ((pPdpeDst->u & X86_PDPE_P) != (PdpeSrc.u & X86_PDPE_P))
3559 {
3560 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3561 GCPtr += 512 * _2M;
3562 cErrors++;
3563 continue;
3564 }
3565
3566 if (GCPhysPdeSrc != pShwPde->GCPhys)
3567 {
3568# if PGM_GST_TYPE == PGM_TYPE_AMD64
3569 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3570# else
3571 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3572# endif
3573 GCPtr += 512 * _2M;
3574 cErrors++;
3575 continue;
3576 }
3577
3578# if PGM_GST_TYPE == PGM_TYPE_AMD64
3579 if ( (pPdpeDst->u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX))
3580 != (PdpeSrc.u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX)))
3581 {
3582 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3583 GCPtr += 512 * _2M;
3584 cErrors++;
3585 continue;
3586 }
3587# endif
3588
3589# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3590 {
3591# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3592# if PGM_GST_TYPE == PGM_TYPE_32BIT
3593 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3594# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3595 PCX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
3596# endif
3597# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3598 /*
3599 * Iterate the shadow page directory.
3600 */
3601 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3602 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3603
3604 for (;
3605 iPDDst < cPDEs;
3606 iPDDst++, GCPtr += cIncrement)
3607 {
3608# if PGM_SHW_TYPE == PGM_TYPE_PAE
3609 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pVCpu, GCPtr);
3610# else
3611 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3612# endif
3613 if ( (PdeDst.u & X86_PDE_P)
3614 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) )
3615 {
3616 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3617 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3618 if (!pPoolPage)
3619 {
3620 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3621 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3622 cErrors++;
3623 continue;
3624 }
3625 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pPoolPage);
3626
3627 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3628 {
3629 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3630 GCPtr, (uint64_t)PdeDst.u));
3631 cErrors++;
3632 }
3633
3634 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3635 {
3636 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3637 GCPtr, (uint64_t)PdeDst.u));
3638 cErrors++;
3639 }
3640
3641 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3642 if (!(PdeSrc.u & X86_PDE_P))
3643 {
3644 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3645 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3646 cErrors++;
3647 continue;
3648 }
3649
3650 if ( !(PdeSrc.u & X86_PDE_PS)
3651 || !fBigPagesSupported)
3652 {
3653 GCPhysGst = GST_GET_PDE_GCPHYS(PdeSrc);
3654# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3655 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | ((iPDDst & 1) * (PAGE_SIZE / 2)));
3656# endif
3657 }
3658 else
3659 {
3660# if PGM_GST_TYPE == PGM_TYPE_32BIT
3661 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3662 {
3663 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3664 GCPtr, (uint64_t)PdeSrc.u));
3665 cErrors++;
3666 continue;
3667 }
3668# endif
3669 GCPhysGst = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
3670# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3671 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | (GCPtr & RT_BIT(X86_PAGE_2M_SHIFT)));
3672# endif
3673 }
3674
3675 if ( pPoolPage->enmKind
3676 != (!(PdeSrc.u & X86_PDE_PS) || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3677 {
3678 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3679 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3680 cErrors++;
3681 }
3682
3683 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
3684 if (!pPhysPage)
3685 {
3686 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3687 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3688 cErrors++;
3689 continue;
3690 }
3691
3692 if (GCPhysGst != pPoolPage->GCPhys)
3693 {
3694 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3695 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3696 cErrors++;
3697 continue;
3698 }
3699
3700 if ( !(PdeSrc.u & X86_PDE_PS)
3701 || !fBigPagesSupported)
3702 {
3703 /*
3704 * Page Table.
3705 */
3706 const GSTPT *pPTSrc;
3707 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, PGM_A20_APPLY(pVCpu, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1)),
3708 &pPTSrc);
3709 if (RT_FAILURE(rc))
3710 {
3711 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3712 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3713 cErrors++;
3714 continue;
3715 }
3716 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3717 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3718 {
3719 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3720 // (This problem will go away when/if we shadow multiple CR3s.)
3721 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3722 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3723 cErrors++;
3724 continue;
3725 }
3726 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3727 {
3728 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3729 GCPtr, (uint64_t)PdeDst.u));
3730 cErrors++;
3731 continue;
3732 }
3733
3734 /* iterate the page table. */
3735# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3736 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3737 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3738# else
3739 const unsigned offPTSrc = 0;
3740# endif
3741 for (unsigned iPT = 0, off = 0;
3742 iPT < RT_ELEMENTS(pPTDst->a);
3743 iPT++, off += PAGE_SIZE)
3744 {
3745 const SHWPTE PteDst = pPTDst->a[iPT];
3746
3747 /* skip not-present and dirty tracked entries. */
3748 if (!(SHW_PTE_GET_U(PteDst) & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3749 continue;
3750 Assert(SHW_PTE_IS_P(PteDst));
3751
3752 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3753 if (!(PteSrc.u & X86_PTE_P))
3754 {
3755# ifdef IN_RING3
3756 PGMAssertHandlerAndFlagsInSync(pVM);
3757 DBGFR3PagingDumpEx(pVM->pUVM, pVCpu->idCpu, DBGFPGDMP_FLAGS_CURRENT_CR3 | DBGFPGDMP_FLAGS_CURRENT_MODE
3758 | DBGFPGDMP_FLAGS_GUEST | DBGFPGDMP_FLAGS_HEADER | DBGFPGDMP_FLAGS_PRINT_CR3,
3759 0, 0, UINT64_MAX, 99, NULL);
3760# endif
3761 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3762 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst), pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3763 (uint64_t)GST_GET_PDE_GCPHYS(PdeSrc) + (iPT + offPTSrc) * sizeof(PteSrc)));
3764 cErrors++;
3765 continue;
3766 }
3767
3768 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3769# if 1 /** @todo sync accessed bit properly... */
3770 fIgnoreFlags |= X86_PTE_A;
3771# endif
3772
3773 /* match the physical addresses */
3774 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
3775 GCPhysGst = GST_GET_PTE_GCPHYS(PteSrc);
3776
3777# ifdef IN_RING3
3778 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3779 if (RT_FAILURE(rc))
3780 {
3781 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3782 {
3783 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3784 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3785 cErrors++;
3786 continue;
3787 }
3788 }
3789 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3790 {
3791 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3792 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3793 cErrors++;
3794 continue;
3795 }
3796# endif
3797
3798 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
3799 if (!pPhysPage)
3800 {
3801# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3802 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3803 {
3804 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3805 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3806 cErrors++;
3807 continue;
3808 }
3809# endif
3810 if (SHW_PTE_IS_RW(PteDst))
3811 {
3812 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3813 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3814 cErrors++;
3815 }
3816 fIgnoreFlags |= X86_PTE_RW;
3817 }
3818 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
3819 {
3820 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3821 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3822 cErrors++;
3823 continue;
3824 }
3825
3826 /* flags */
3827 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3828 {
3829 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3830 {
3831 if (SHW_PTE_IS_RW(PteDst))
3832 {
3833 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3834 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3835 cErrors++;
3836 continue;
3837 }
3838 fIgnoreFlags |= X86_PTE_RW;
3839 }
3840 else
3841 {
3842 if ( SHW_PTE_IS_P(PteDst)
3843# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
3844 && !PGM_PAGE_IS_MMIO(pPhysPage)
3845# endif
3846 )
3847 {
3848 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3849 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3850 cErrors++;
3851 continue;
3852 }
3853 fIgnoreFlags |= X86_PTE_P;
3854 }
3855 }
3856 else
3857 {
3858 if ((PteSrc.u & (X86_PTE_RW | X86_PTE_D)) == X86_PTE_RW)
3859 {
3860 if (SHW_PTE_IS_RW(PteDst))
3861 {
3862 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3863 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3864 cErrors++;
3865 continue;
3866 }
3867 if (!SHW_PTE_IS_TRACK_DIRTY(PteDst))
3868 {
3869 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3870 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3871 cErrors++;
3872 continue;
3873 }
3874 if (SHW_PTE_IS_D(PteDst))
3875 {
3876 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3877 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3878 cErrors++;
3879 }
3880# if 0 /** @todo sync access bit properly... */
3881 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3882 {
3883 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3884 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3885 cErrors++;
3886 }
3887 fIgnoreFlags |= X86_PTE_RW;
3888# else
3889 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3890# endif
3891 }
3892 else if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
3893 {
3894 /* access bit emulation (not implemented). */
3895 if ((PteSrc.u & X86_PTE_A) || SHW_PTE_IS_P(PteDst))
3896 {
3897 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3898 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3899 cErrors++;
3900 continue;
3901 }
3902 if (!SHW_PTE_IS_A(PteDst))
3903 {
3904 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3905 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3906 cErrors++;
3907 }
3908 fIgnoreFlags |= X86_PTE_P;
3909 }
3910# ifdef DEBUG_sandervl
3911 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3912# endif
3913 }
3914
3915 if ( (PteSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
3916 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
3917 )
3918 {
3919 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3920 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
3921 fIgnoreFlags, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3922 cErrors++;
3923 continue;
3924 }
3925 } /* foreach PTE */
3926 }
3927 else
3928 {
3929 /*
3930 * Big Page.
3931 */
3932 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
3933 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
3934 {
3935 if (PdeDst.u & X86_PDE_RW)
3936 {
3937 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3938 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3939 cErrors++;
3940 continue;
3941 }
3942 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
3943 {
3944 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3945 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3946 cErrors++;
3947 continue;
3948 }
3949# if 0 /** @todo sync access bit properly... */
3950 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
3951 {
3952 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3953 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3954 cErrors++;
3955 }
3956 fIgnoreFlags |= X86_PTE_RW;
3957# else
3958 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3959# endif
3960 }
3961 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3962 {
3963 /* access bit emulation (not implemented). */
3964 if ((PdeSrc.u & X86_PDE_A) || SHW_PDE_IS_P(PdeDst))
3965 {
3966 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3967 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3968 cErrors++;
3969 continue;
3970 }
3971 if (!SHW_PDE_IS_A(PdeDst))
3972 {
3973 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3974 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3975 cErrors++;
3976 }
3977 fIgnoreFlags |= X86_PTE_P;
3978 }
3979
3980 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
3981 {
3982 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
3983 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
3984 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3985 cErrors++;
3986 }
3987
3988 /* iterate the page table. */
3989 for (unsigned iPT = 0, off = 0;
3990 iPT < RT_ELEMENTS(pPTDst->a);
3991 iPT++, off += PAGE_SIZE, GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst + PAGE_SIZE))
3992 {
3993 const SHWPTE PteDst = pPTDst->a[iPT];
3994
3995 if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
3996 {
3997 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
3998 GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
3999 cErrors++;
4000 }
4001
4002 /* skip not-present entries. */
4003 if (!SHW_PTE_IS_P(PteDst)) /** @todo deal with ALL handlers and CSAM !P pages! */
4004 continue;
4005
4006 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4007
4008 /* match the physical addresses */
4009 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
4010
4011# ifdef IN_RING3
4012 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4013 if (RT_FAILURE(rc))
4014 {
4015 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4016 {
4017 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4018 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4019 cErrors++;
4020 }
4021 }
4022 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4023 {
4024 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4025 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4026 cErrors++;
4027 continue;
4028 }
4029# endif
4030 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
4031 if (!pPhysPage)
4032 {
4033# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4034 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4035 {
4036 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4037 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4038 cErrors++;
4039 continue;
4040 }
4041# endif
4042 if (SHW_PTE_IS_RW(PteDst))
4043 {
4044 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4045 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4046 cErrors++;
4047 }
4048 fIgnoreFlags |= X86_PTE_RW;
4049 }
4050 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4051 {
4052 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4053 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4054 cErrors++;
4055 continue;
4056 }
4057
4058 /* flags */
4059 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4060 {
4061 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4062 {
4063 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4064 {
4065 if (SHW_PTE_IS_RW(PteDst))
4066 {
4067 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4068 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4069 cErrors++;
4070 continue;
4071 }
4072 fIgnoreFlags |= X86_PTE_RW;
4073 }
4074 }
4075 else
4076 {
4077 if ( SHW_PTE_IS_P(PteDst)
4078# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4079 && !PGM_PAGE_IS_MMIO(pPhysPage)
4080# endif
4081 )
4082 {
4083 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4084 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4085 cErrors++;
4086 continue;
4087 }
4088 fIgnoreFlags |= X86_PTE_P;
4089 }
4090 }
4091
4092 if ( (PdeSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4093 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags) /* lazy phys handler dereg. */
4094 )
4095 {
4096 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4097 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4098 fIgnoreFlags, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4099 cErrors++;
4100 continue;
4101 }
4102 } /* for each PTE */
4103 }
4104 }
4105 /* not present */
4106
4107 } /* for each PDE */
4108
4109 } /* for each PDPTE */
4110
4111 } /* for each PML4E */
4112
4113# ifdef DEBUG
4114 if (cErrors)
4115 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4116# endif
4117# endif /* GST is in {32BIT, PAE, AMD64} */
4118 return cErrors;
4119#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
4120}
4121#endif /* VBOX_STRICT */
4122
4123
4124/**
4125 * Sets up the CR3 for shadow paging
4126 *
4127 * @returns Strict VBox status code.
4128 * @retval VINF_SUCCESS.
4129 *
4130 * @param pVCpu The cross context virtual CPU structure.
4131 * @param GCPhysCR3 The physical address in the CR3 register. (A20 mask
4132 * already applied.)
4133 */
4134PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
4135{
4136 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4137 int rc = VINF_SUCCESS;
4138
4139 /* Update guest paging info. */
4140#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4141 || PGM_GST_TYPE == PGM_TYPE_PAE \
4142 || PGM_GST_TYPE == PGM_TYPE_AMD64
4143
4144 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4145 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
4146
4147# if PGM_GST_TYPE == PGM_TYPE_PAE
4148 if (!pVCpu->pgm.s.CTX_SUFF(fPaePdpesAndCr3Mapped))
4149# endif
4150 {
4151 /*
4152 * Map the page CR3 points at.
4153 */
4154 RTHCPTR HCPtrGuestCR3;
4155 rc = pgmGstMapCr3(pVCpu, GCPhysCR3, &HCPtrGuestCR3);
4156 if (RT_SUCCESS(rc))
4157 {
4158# if PGM_GST_TYPE == PGM_TYPE_32BIT
4159# ifdef IN_RING3
4160 pVCpu->pgm.s.pGst32BitPdR3 = (PX86PD)HCPtrGuestCR3;
4161 pVCpu->pgm.s.pGst32BitPdR0 = NIL_RTR0PTR;
4162# else
4163 pVCpu->pgm.s.pGst32BitPdR3 = NIL_RTR3PTR;
4164 pVCpu->pgm.s.pGst32BitPdR0 = (PX86PD)HCPtrGuestCR3;
4165# endif
4166
4167# elif PGM_GST_TYPE == PGM_TYPE_PAE
4168# ifdef IN_RING3
4169 pVCpu->pgm.s.pGstPaePdptR3 = (PX86PDPT)HCPtrGuestCR3;
4170 pVCpu->pgm.s.pGstPaePdptR0 = NIL_RTR0PTR;
4171# else
4172 pVCpu->pgm.s.pGstPaePdptR3 = NIL_RTR3PTR;
4173 pVCpu->pgm.s.pGstPaePdptR0 = (PX86PDPT)HCPtrGuestCR3;
4174# endif
4175
4176 /*
4177 * Update CPUM and map the 4 PDs too.
4178 */
4179 X86PDPE aGstPaePdpes[X86_PG_PAE_PDPE_ENTRIES];
4180 memcpy(&aGstPaePdpes, HCPtrGuestCR3, sizeof(aGstPaePdpes));
4181 CPUMSetGuestPaePdpes(pVCpu, &aGstPaePdpes[0]);
4182 PGMGstMapPaePdpes(pVCpu, &aGstPaePdpes[0]);
4183
4184# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4185# ifdef IN_RING3
4186 pVCpu->pgm.s.pGstAmd64Pml4R3 = (PX86PML4)HCPtrGuestCR3;
4187 pVCpu->pgm.s.pGstAmd64Pml4R0 = NIL_RTR0PTR;
4188# else
4189 pVCpu->pgm.s.pGstAmd64Pml4R3 = NIL_RTR3PTR;
4190 pVCpu->pgm.s.pGstAmd64Pml4R0 = (PX86PML4)HCPtrGuestCR3;
4191# endif
4192# endif
4193 }
4194 else
4195 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4196 }
4197
4198 /*
4199 * Reset fPaePdpesAndCr3Mapped for all modes as there's no guarantee that
4200 * we were called in the correct sequence of PAE followed by other modes
4201 * without CR3 changing in between.
4202 */
4203 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = false;
4204 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = false;
4205#endif
4206
4207 /*
4208 * Update shadow paging info for guest modes with paging (32-bit, PAE, AMD64).
4209 */
4210# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4211 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4212 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4213 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4214 && PGM_GST_TYPE != PGM_TYPE_PROT))
4215
4216 Assert(!pVM->pgm.s.fNestedPaging);
4217 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
4218
4219 /*
4220 * Update the shadow root page as well since that's not fixed.
4221 */
4222 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4223 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4224 PPGMPOOLPAGE pNewShwPageCR3;
4225
4226 PGM_LOCK_VOID(pVM);
4227
4228# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4229 if (pPool->cDirtyPages)
4230 pgmPoolResetDirtyPages(pVM);
4231# endif
4232
4233 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4234 int const rc2 = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE,
4235 PGM_A20_IS_ENABLED(pVCpu), NIL_PGMPOOL_IDX, UINT32_MAX, true /*fLockPage*/, &pNewShwPageCR3);
4236 AssertFatalRC(rc2);
4237
4238 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4239# ifdef IN_RING0
4240 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4241# else
4242 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4243# endif
4244
4245 /* Set the current hypervisor CR3. */
4246 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4247
4248 /* Clean up the old CR3 root. */
4249 if ( pOldShwPageCR3
4250 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4251 {
4252 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4253
4254 /* Mark the page as unlocked; allow flushing again. */
4255 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4256
4257 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
4258 }
4259 PGM_UNLOCK(pVM);
4260# else
4261 NOREF(GCPhysCR3);
4262# endif
4263
4264 return rc;
4265}
4266
4267/**
4268 * Unmaps the shadow CR3.
4269 *
4270 * @returns VBox status, no specials.
4271 * @param pVCpu The cross context virtual CPU structure.
4272 */
4273PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu)
4274{
4275 LogFlow(("UnmapCR3\n"));
4276
4277 int rc = VINF_SUCCESS;
4278 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4279
4280 /*
4281 * Update guest paging info.
4282 */
4283#if PGM_GST_TYPE == PGM_TYPE_32BIT
4284 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4285 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4286
4287#elif PGM_GST_TYPE == PGM_TYPE_PAE
4288 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4289 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4290 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4291 {
4292 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4293 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4294 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4295 }
4296
4297#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4298 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4299 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4300
4301#else /* prot/real mode stub */
4302 /* nothing to do */
4303#endif
4304
4305 /*
4306 * Update second-level address translation info.
4307 */
4308#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4309 pVCpu->pgm.s.pGstEptPml4R3 = 0;
4310 pVCpu->pgm.s.pGstEptPml4R0 = 0;
4311#endif
4312
4313 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = false;
4314 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = false;
4315
4316 /*
4317 * Update shadow paging info.
4318 */
4319#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4320 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4321 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4322# if PGM_GST_TYPE != PGM_TYPE_REAL
4323 Assert(!pVM->pgm.s.fNestedPaging);
4324# endif
4325 PGM_LOCK_VOID(pVM);
4326
4327 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4328 {
4329 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4330
4331# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4332 if (pPool->cDirtyPages)
4333 pgmPoolResetDirtyPages(pVM);
4334# endif
4335
4336 /* Mark the page as unlocked; allow flushing again. */
4337 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4338
4339 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), NIL_PGMPOOL_IDX, UINT32_MAX);
4340 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4341 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4342 }
4343
4344 PGM_UNLOCK(pVM);
4345#endif
4346
4347 return rc;
4348}
4349
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette