VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 93582

Last change on this file since 93582 was 93554, checked in by vboxsync, 3 years ago

VMM: Changed PAGE_SIZE -> GUEST_PAGE_SIZE / HOST_PAGE_SIZE, PAGE_SHIFT -> GUEST_PAGE_SHIFT / HOST_PAGE_SHIFT, and PAGE_OFFSET_MASK -> GUEST_PAGE_OFFSET_MASK / HOST_PAGE_OFFSET_MASK. Also removed most usage of ASMMemIsZeroPage and ASMMemZeroPage since the host and guest page size doesn't need to be the same any more. Some work left to do in the page pool code. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 189.3 KB
Line 
1/* $Id: PGMAllBth.h 93554 2022-02-02 22:57:02Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
6 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
7 * bird: WTF does this mean these days? Looking at PGMAll.cpp it's
8 *
9 * @remarks This file is one big \#ifdef-orgy!
10 *
11 */
12
13/*
14 * Copyright (C) 2006-2022 Oracle Corporation
15 *
16 * This file is part of VirtualBox Open Source Edition (OSE), as
17 * available from http://www.virtualbox.org. This file is free software;
18 * you can redistribute it and/or modify it under the terms of the GNU
19 * General Public License (GPL) as published by the Free Software
20 * Foundation, in version 2 as it comes in the "COPYING" file of the
21 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
22 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
23 */
24
25#ifdef _MSC_VER
26/** @todo we're generating unnecessary code in nested/ept shadow mode and for
27 * real/prot-guest+RC mode. */
28# pragma warning(disable: 4505)
29#endif
30
31
32/*********************************************************************************************************************************
33* Internal Functions *
34*********************************************************************************************************************************/
35RT_C_DECLS_BEGIN
36PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
37#ifndef IN_RING3
38PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
39#endif
40PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
41static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
42static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage);
43static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
44#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
45static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
46#else
47static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
48#endif
49PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
50PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
51PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
52#ifdef VBOX_STRICT
53PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
54#endif
55PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
56PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu);
57
58#ifdef IN_RING3
59PGM_BTH_DECL(int, Relocate)(PVMCPUCC pVCpu, RTGCPTR offDelta);
60#endif
61RT_C_DECLS_END
62
63
64
65
66/*
67 * Filter out some illegal combinations of guest and shadow paging, so we can
68 * remove redundant checks inside functions.
69 */
70#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE \
71 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
72# error "Invalid combination; PAE guest implies PAE shadow"
73#endif
74
75#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
76 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 \
77 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
78# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
79#endif
80
81#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
82 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE \
83 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
84# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
85#endif
86
87#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE) \
88 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
89# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
90#endif
91
92
93/**
94 * Enters the shadow+guest mode.
95 *
96 * @returns VBox status code.
97 * @param pVCpu The cross context virtual CPU structure.
98 * @param GCPhysCR3 The physical address from the CR3 register.
99 */
100PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
101{
102 /* Here we deal with allocation of the root shadow page table for real and protected mode during mode switches;
103 * Other modes rely on MapCR3/UnmapCR3 to setup the shadow root page tables.
104 */
105#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
106 || PGM_SHW_TYPE == PGM_TYPE_PAE \
107 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
108 && ( PGM_GST_TYPE == PGM_TYPE_REAL \
109 || PGM_GST_TYPE == PGM_TYPE_PROT))
110
111 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
112
113 Assert((HMIsNestedPagingActive(pVM) || VM_IS_NEM_ENABLED(pVM)) == pVM->pgm.s.fNestedPaging);
114 Assert(!pVM->pgm.s.fNestedPaging);
115
116 PGM_LOCK_VOID(pVM);
117 /* Note: we only really need shadow paging in real and protected mode for VT-x and AMD-V (excluding nested paging/EPT modes),
118 * but any calls to GC need a proper shadow page setup as well.
119 */
120 /* Free the previous root mapping if still active. */
121 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
122 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
123 if (pOldShwPageCR3)
124 {
125 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
126
127 /* Mark the page as unlocked; allow flushing again. */
128 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
129
130 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
131 pVCpu->pgm.s.pShwPageCR3R3 = NIL_RTR3PTR;
132 pVCpu->pgm.s.pShwPageCR3R0 = NIL_RTR0PTR;
133 }
134
135 /* construct a fake address. */
136 GCPhysCR3 = RT_BIT_64(63);
137 PPGMPOOLPAGE pNewShwPageCR3;
138 int rc = pgmPoolAlloc(pVM, GCPhysCR3, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
139 NIL_PGMPOOL_IDX, UINT32_MAX, false /*fLockPage*/,
140 &pNewShwPageCR3);
141 AssertRCReturn(rc, rc);
142
143 pVCpu->pgm.s.pShwPageCR3R3 = (R3PTRTYPE(PPGMPOOLPAGE))MMHyperCCToR3(pVM, pNewShwPageCR3);
144 pVCpu->pgm.s.pShwPageCR3R0 = (R0PTRTYPE(PPGMPOOLPAGE))MMHyperCCToR0(pVM, pNewShwPageCR3);
145
146 /* Mark the page as locked; disallow flushing. */
147 pgmPoolLockPage(pPool, pNewShwPageCR3);
148
149 /* Set the current hypervisor CR3. */
150 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
151
152 PGM_UNLOCK(pVM);
153 return rc;
154#else
155 NOREF(pVCpu); NOREF(GCPhysCR3);
156 return VINF_SUCCESS;
157#endif
158}
159
160
161#ifndef IN_RING3
162
163# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
164/**
165 * Deal with a guest page fault.
166 *
167 * @returns Strict VBox status code.
168 * @retval VINF_EM_RAW_GUEST_TRAP
169 * @retval VINF_EM_RAW_EMULATE_INSTR
170 *
171 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
172 * @param pWalk The guest page table walk result.
173 * @param uErr The error code.
174 */
175PGM_BTH_DECL(VBOXSTRICTRC, Trap0eHandlerGuestFault)(PVMCPUCC pVCpu, PPGMPTWALK pWalk, RTGCUINT uErr)
176{
177 /*
178 * Calc the error code for the guest trap.
179 */
180 uint32_t uNewErr = GST_IS_NX_ACTIVE(pVCpu)
181 ? uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID)
182 : uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US);
183 if ( pWalk->fRsvdError
184 || pWalk->fBadPhysAddr)
185 {
186 uNewErr |= X86_TRAP_PF_RSVD | X86_TRAP_PF_P;
187 Assert(!pWalk->fNotPresent);
188 }
189 else if (!pWalk->fNotPresent)
190 uNewErr |= X86_TRAP_PF_P;
191 TRPMSetErrorCode(pVCpu, uNewErr);
192
193 LogFlow(("Guest trap; cr2=%RGv uErr=%RGv lvl=%d\n", pWalk->GCPtr, uErr, pWalk->uLevel));
194 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
195 return VINF_EM_RAW_GUEST_TRAP;
196}
197# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
198
199
200#if !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
201/**
202 * Deal with a guest page fault.
203 *
204 * The caller has taken the PGM lock.
205 *
206 * @returns Strict VBox status code.
207 *
208 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
209 * @param uErr The error code.
210 * @param pRegFrame The register frame.
211 * @param pvFault The fault address.
212 * @param pPage The guest page at @a pvFault.
213 * @param pWalk The guest page table walk result.
214 * @param pGstWalk The guest paging-mode specific walk information.
215 * @param pfLockTaken PGM lock taken here or not (out). This is true
216 * when we're called.
217 */
218static VBOXSTRICTRC PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame,
219 RTGCPTR pvFault, PPGMPAGE pPage, bool *pfLockTaken
220# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
221 , PPGMPTWALK pWalk
222 , PGSTPTWALK pGstWalk
223# endif
224 )
225{
226# if !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
227 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A };
228# endif
229 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
230 VBOXSTRICTRC rcStrict;
231
232 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
233 {
234 /*
235 * Physical page access handler.
236 */
237# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
238 const RTGCPHYS GCPhysFault = pWalk->GCPhys;
239# else
240 const RTGCPHYS GCPhysFault = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault);
241# endif
242 PPGMPHYSHANDLER pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
243 if (pCur)
244 {
245 PPGMPHYSHANDLERTYPEINT pCurType = PGMPHYSHANDLER_GET_TYPE(pVM, pCur);
246
247# ifdef PGM_SYNC_N_PAGES
248 /*
249 * If the region is write protected and we got a page not present fault, then sync
250 * the pages. If the fault was caused by a read, then restart the instruction.
251 * In case of write access continue to the GC write handler.
252 *
253 * ASSUMES that there is only one handler per page or that they have similar write properties.
254 */
255 if ( !(uErr & X86_TRAP_PF_P)
256 && pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
257 {
258# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
259 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
260# else
261 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
262# endif
263 if ( RT_FAILURE(rcStrict)
264 || !(uErr & X86_TRAP_PF_RW)
265 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
266 {
267 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
268 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
269 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
270 return rcStrict;
271 }
272 }
273# endif
274# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
275 /*
276 * If the access was not thru a #PF(RSVD|...) resync the page.
277 */
278 if ( !(uErr & X86_TRAP_PF_RSVD)
279 && pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
280# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
281 && (pWalk->fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK))
282 == PGM_PTATTRS_W_MASK /** @todo Remove pGstWalk->Core.fEffectiveUS and X86_PTE_US further down in the sync code. */
283# endif
284 )
285 {
286# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
287 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
288# else
289 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
290# endif
291 if ( RT_FAILURE(rcStrict)
292 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
293 {
294 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
295 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
296 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
297 return rcStrict;
298 }
299 }
300# endif
301
302 AssertMsg( pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
303 || (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE && (uErr & X86_TRAP_PF_RW)),
304 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enmKind=%d\n",
305 pvFault, GCPhysFault, pPage, uErr, pCurType->enmKind));
306 if (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
307 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysWrite);
308 else
309 {
310 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAll);
311 if (uErr & X86_TRAP_PF_RSVD) STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAllOpt);
312 }
313
314 if (pCurType->CTX_SUFF(pfnPfHandler))
315 {
316 STAM_PROFILE_START(&pCur->Stat, h);
317
318 if (pCurType->fKeepPgmLock)
319 {
320 rcStrict = pCurType->CTX_SUFF(pfnPfHandler)(pVM, pVCpu, uErr, pRegFrame, pvFault, GCPhysFault,
321 pCur->CTX_SUFF(pvUser));
322
323# ifdef VBOX_WITH_STATISTICS
324 pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault); /* paranoia in case the handler deregistered itself */
325 if (pCur)
326 STAM_PROFILE_STOP(&pCur->Stat, h);
327# endif
328 }
329 else
330 {
331 void * const pvUser = pCur->CTX_SUFF(pvUser);
332 PGM_UNLOCK(pVM);
333 *pfLockTaken = false;
334
335 rcStrict = pCurType->CTX_SUFF(pfnPfHandler)(pVM, pVCpu, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
336
337# ifdef VBOX_WITH_STATISTICS
338 PGM_LOCK_VOID(pVM);
339 pCur = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
340 if (pCur)
341 STAM_PROFILE_STOP(&pCur->Stat, h);
342 PGM_UNLOCK(pVM);
343# endif
344 }
345 }
346 else
347 rcStrict = VINF_EM_RAW_EMULATE_INSTR;
348
349 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndPhys; });
350 return rcStrict;
351 }
352 }
353
354 /*
355 * There is a handled area of the page, but this fault doesn't belong to it.
356 * We must emulate the instruction.
357 *
358 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
359 * we first check if this was a page-not-present fault for a page with only
360 * write access handlers. Restart the instruction if it wasn't a write access.
361 */
362 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersUnhandled);
363
364 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
365 && !(uErr & X86_TRAP_PF_P))
366 {
367# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
368 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
369# else
370 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
371# endif
372 if ( RT_FAILURE(rcStrict)
373 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE
374 || !(uErr & X86_TRAP_PF_RW))
375 {
376 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
377 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
378 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
379 return rcStrict;
380 }
381 }
382
383 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
384 * It's writing to an unhandled part of the LDT page several million times.
385 */
386 rcStrict = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
387 LogFlow(("PGM: PGMInterpretInstruction -> rcStrict=%d pPage=%R[pgmpage]\n", VBOXSTRICTRC_VAL(rcStrict), pPage));
388 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndUnhandled; });
389 return rcStrict;
390} /* if any kind of handler */
391# endif /* !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE*/
392
393
394/**
395 * \#PF Handler for raw-mode guest execution.
396 *
397 * @returns VBox status code (appropriate for trap handling and GC return).
398 *
399 * @param pVCpu The cross context virtual CPU structure.
400 * @param uErr The trap error code.
401 * @param pRegFrame Trap register frame.
402 * @param pvFault The fault address.
403 * @param pfLockTaken PGM lock taken here or not (out)
404 */
405PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
406{
407 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
408
409 *pfLockTaken = false;
410
411# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
412 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
413 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
414 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
415 && PGM_SHW_TYPE != PGM_TYPE_NONE
416 int rc;
417
418# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
419 /*
420 * Walk the guest page translation tables and check if it's a guest fault.
421 */
422 PGMPTWALK Walk;
423 GSTPTWALK GstWalk;
424 rc = PGM_GST_NAME(Walk)(pVCpu, pvFault, &Walk, &GstWalk);
425 if (RT_FAILURE_NP(rc))
426 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &Walk, uErr));
427
428 /* assert some GstWalk sanity. */
429# if PGM_GST_TYPE == PGM_TYPE_AMD64
430 /*AssertMsg(GstWalk.Pml4e.u == GstWalk.pPml4e->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pml4e.u, (uint64_t)GstWalk.pPml4e->u)); - not always true with SMP guests. */
431# endif
432# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
433 /*AssertMsg(GstWalk.Pdpe.u == GstWalk.pPdpe->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pdpe.u, (uint64_t)GstWalk.pPdpe->u)); - ditto */
434# endif
435 /*AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); - ditto */
436 /*AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); - ditto */
437 Assert(Walk.fSucceeded);
438 Assert(Walk.fEffective & PGM_PTATTRS_R_MASK);
439
440 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
441 {
442 if ( ( (uErr & X86_TRAP_PF_RW)
443 && !(Walk.fEffective & PGM_PTATTRS_W_MASK)
444 && ( (uErr & X86_TRAP_PF_US)
445 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
446 || ((uErr & X86_TRAP_PF_US) && !(Walk.fEffective & PGM_PTATTRS_US_MASK))
447 || ((uErr & X86_TRAP_PF_ID) && (Walk.fEffective & PGM_PTATTRS_NX_MASK))
448 )
449 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &Walk, uErr));
450 }
451
452 /* Take the big lock now before we update flags. */
453 *pfLockTaken = true;
454 PGM_LOCK_VOID(pVM);
455
456 /*
457 * Set the accessed and dirty flags.
458 */
459 /** @todo Should probably use cmpxchg logic here as we're potentially racing
460 * other CPUs in SMP configs. (the lock isn't enough, since we take it
461 * after walking and the page tables could be stale already) */
462# if PGM_GST_TYPE == PGM_TYPE_AMD64
463 if (!(GstWalk.Pml4e.u & X86_PML4E_A))
464 {
465 GstWalk.Pml4e.u |= X86_PML4E_A;
466 GST_ATOMIC_OR(&GstWalk.pPml4e->u, X86_PML4E_A);
467 }
468 if (!(GstWalk.Pdpe.u & X86_PDPE_A))
469 {
470 GstWalk.Pdpe.u |= X86_PDPE_A;
471 GST_ATOMIC_OR(&GstWalk.pPdpe->u, X86_PDPE_A);
472 }
473# endif
474 if (Walk.fBigPage)
475 {
476 Assert(GstWalk.Pde.u & X86_PDE_PS);
477 if (uErr & X86_TRAP_PF_RW)
478 {
479 if ((GstWalk.Pde.u & (X86_PDE4M_A | X86_PDE4M_D)) != (X86_PDE4M_A | X86_PDE4M_D))
480 {
481 GstWalk.Pde.u |= X86_PDE4M_A | X86_PDE4M_D;
482 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A | X86_PDE4M_D);
483 }
484 }
485 else
486 {
487 if (!(GstWalk.Pde.u & X86_PDE4M_A))
488 {
489 GstWalk.Pde.u |= X86_PDE4M_A;
490 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A);
491 }
492 }
493 }
494 else
495 {
496 Assert(!(GstWalk.Pde.u & X86_PDE_PS));
497 if (!(GstWalk.Pde.u & X86_PDE_A))
498 {
499 GstWalk.Pde.u |= X86_PDE_A;
500 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE_A);
501 }
502
503 if (uErr & X86_TRAP_PF_RW)
504 {
505# ifdef VBOX_WITH_STATISTICS
506 if (GstWalk.Pte.u & X86_PTE_D)
507 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageAlreadyDirty));
508 else
509 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtiedPage));
510# endif
511 if ((GstWalk.Pte.u & (X86_PTE_A | X86_PTE_D)) != (X86_PTE_A | X86_PTE_D))
512 {
513 GstWalk.Pte.u |= X86_PTE_A | X86_PTE_D;
514 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A | X86_PTE_D);
515 }
516 }
517 else
518 {
519 if (!(GstWalk.Pte.u & X86_PTE_A))
520 {
521 GstWalk.Pte.u |= X86_PTE_A;
522 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A);
523 }
524 }
525 Assert(GstWalk.Pte.u == GstWalk.pPte->u);
526 }
527#if 0
528 /* Disabling this since it's not reliable for SMP, see @bugref{10092#c22}. */
529 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u,
530 ("%RX64 %RX64 pPte=%p pPde=%p Pte=%RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u, GstWalk.pPte, GstWalk.pPde, (uint64_t)GstWalk.pPte->u));
531#endif
532
533# else /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
534 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A}; /** @todo eliminate this */
535
536 /* Take the big lock now. */
537 *pfLockTaken = true;
538 PGM_LOCK_VOID(pVM);
539# endif /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
540
541# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
542 /*
543 * If it is a reserved bit fault we know that it is an MMIO (access
544 * handler) related fault and can skip some 200 lines of code.
545 */
546 if (uErr & X86_TRAP_PF_RSVD)
547 {
548 Assert(uErr & X86_TRAP_PF_P);
549 PPGMPAGE pPage;
550# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
551 rc = pgmPhysGetPageEx(pVM, Walk.GCPhys, &pPage);
552 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
553 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
554 pfLockTaken, &Walk, &GstWalk));
555 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
556# else
557 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault), &pPage);
558 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
559 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
560 pfLockTaken));
561 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
562# endif
563 AssertRC(rc);
564 PGM_INVL_PG(pVCpu, pvFault);
565 return rc; /* Restart with the corrected entry. */
566 }
567# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
568
569 /*
570 * Fetch the guest PDE, PDPE and PML4E.
571 */
572# if PGM_SHW_TYPE == PGM_TYPE_32BIT
573 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
574 PX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
575
576# elif PGM_SHW_TYPE == PGM_TYPE_PAE
577 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
578 PX86PDPAE pPDDst;
579# if PGM_GST_TYPE == PGM_TYPE_PAE
580 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, GstWalk.Pdpe.u, &pPDDst);
581# else
582 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, X86_PDPE_P, &pPDDst); /* RW, US and A are reserved in PAE mode. */
583# endif
584 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
585
586# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
587 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
588 PX86PDPAE pPDDst;
589# if PGM_GST_TYPE == PGM_TYPE_PROT /* (AMD-V nested paging) */
590 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A,
591 X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A, &pPDDst);
592# else
593 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, GstWalk.Pml4e.u, GstWalk.Pdpe.u, &pPDDst);
594# endif
595 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
596
597# elif PGM_SHW_TYPE == PGM_TYPE_EPT
598 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
599 PEPTPD pPDDst;
600 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
601 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
602# endif
603 Assert(pPDDst);
604
605# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
606 /*
607 * Dirty page handling.
608 *
609 * If we successfully correct the write protection fault due to dirty bit
610 * tracking, then return immediately.
611 */
612 if (uErr & X86_TRAP_PF_RW) /* write fault? */
613 {
614 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
615 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], GstWalk.pPde, pvFault);
616 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
617 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
618 {
619 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0
620 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
621 ? &pVCpu->pgm.s.Stats.StatRZTrap0eTime2DirtyAndAccessed
622 : &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
623 Log8(("Trap0eHandler: returns VINF_SUCCESS\n"));
624 return VINF_SUCCESS;
625 }
626#ifdef DEBUG_bird
627 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); // - triggers with smp w7 guests.
628 AssertMsg(Walk.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); // - ditto.
629#endif
630 }
631
632# if 0 /* rarely useful; leave for debugging. */
633 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
634# endif
635# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
636
637 /*
638 * A common case is the not-present error caused by lazy page table syncing.
639 *
640 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
641 * here so we can safely assume that the shadow PT is present when calling
642 * SyncPage later.
643 *
644 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
645 * of mapping conflict and defer to SyncCR3 in R3.
646 * (Again, we do NOT support access handlers for non-present guest pages.)
647 *
648 */
649# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
650 Assert(GstWalk.Pde.u & X86_PDE_P);
651# endif
652 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
653 && !SHW_PDE_IS_P(pPDDst->a[iPDDst]))
654 {
655 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2SyncPT; });
656# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
657 LogFlow(("=>SyncPT %04x = %08RX64\n", (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, (uint64_t)GstWalk.Pde.u));
658 rc = PGM_BTH_NAME(SyncPT)(pVCpu, (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, GstWalk.pPd, pvFault);
659# else
660 LogFlow(("=>SyncPT pvFault=%RGv\n", pvFault));
661 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0, NULL, pvFault);
662# endif
663 if (RT_SUCCESS(rc))
664 return rc;
665 Log(("SyncPT: %RGv failed!! rc=%Rrc\n", pvFault, rc));
666 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
667 return VINF_PGM_SYNC_CR3;
668 }
669
670 /*
671 * Check if this fault address is flagged for special treatment,
672 * which means we'll have to figure out the physical address and
673 * check flags associated with it.
674 *
675 * ASSUME that we can limit any special access handling to pages
676 * in page tables which the guest believes to be present.
677 */
678# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
679 RTGCPHYS GCPhys = Walk.GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
680# else
681 RTGCPHYS GCPhys = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
682# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
683 PPGMPAGE pPage;
684 rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
685 if (RT_FAILURE(rc))
686 {
687 /*
688 * When the guest accesses invalid physical memory (e.g. probing
689 * of RAM or accessing a remapped MMIO range), then we'll fall
690 * back to the recompiler to emulate the instruction.
691 */
692 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
693 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersInvalid);
694 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2InvalidPhys; });
695 return VINF_EM_RAW_EMULATE_INSTR;
696 }
697
698 /*
699 * Any handlers for this page?
700 */
701 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
702# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
703 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken,
704 &Walk, &GstWalk));
705# else
706 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken));
707# endif
708
709 /*
710 * We are here only if page is present in Guest page tables and
711 * trap is not handled by our handlers.
712 *
713 * Check it for page out-of-sync situation.
714 */
715 if (!(uErr & X86_TRAP_PF_P))
716 {
717 /*
718 * Page is not present in our page tables. Try to sync it!
719 */
720 if (uErr & X86_TRAP_PF_US)
721 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
722 else /* supervisor */
723 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
724
725 if (PGM_PAGE_IS_BALLOONED(pPage))
726 {
727 /* Emulate reads from ballooned pages as they are not present in
728 our shadow page tables. (Required for e.g. Solaris guests; soft
729 ecc, random nr generator.) */
730 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
731 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
732 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
733 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Ballooned; });
734 return rc;
735 }
736
737# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
738 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
739# else
740 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
741# endif
742 if (RT_SUCCESS(rc))
743 {
744 /* The page was successfully synced, return to the guest. */
745 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSync; });
746 return VINF_SUCCESS;
747 }
748 }
749 else /* uErr & X86_TRAP_PF_P: */
750 {
751 /*
752 * Write protected pages are made writable when the guest makes the
753 * first write to it. This happens for pages that are shared, write
754 * monitored or not yet allocated.
755 *
756 * We may also end up here when CR0.WP=0 in the guest.
757 *
758 * Also, a side effect of not flushing global PDEs are out of sync
759 * pages due to physical monitored regions, that are no longer valid.
760 * Assume for now it only applies to the read/write flag.
761 */
762 if (uErr & X86_TRAP_PF_RW)
763 {
764 /*
765 * Check if it is a read-only page.
766 */
767 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
768 {
769 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
770 Assert(!PGM_PAGE_IS_ZERO(pPage));
771 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
772 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2MakeWritable; });
773
774 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
775 if (rc != VINF_SUCCESS)
776 {
777 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
778 return rc;
779 }
780 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
781 return VINF_EM_NO_MEMORY;
782 }
783
784# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
785 /*
786 * Check to see if we need to emulate the instruction if CR0.WP=0.
787 */
788 if ( !(Walk.fEffective & PGM_PTATTRS_W_MASK)
789 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
790 && CPUMGetGuestCPL(pVCpu) < 3)
791 {
792 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
793
794 /*
795 * The Netware WP0+RO+US hack.
796 *
797 * Netware sometimes(/always?) runs with WP0. It has been observed doing
798 * excessive write accesses to pages which are mapped with US=1 and RW=0
799 * while WP=0. This causes a lot of exits and extremely slow execution.
800 * To avoid trapping and emulating every write here, we change the shadow
801 * page table entry to map it as US=0 and RW=1 until user mode tries to
802 * access it again (see further below). We count these shadow page table
803 * changes so we can avoid having to clear the page pool every time the WP
804 * bit changes to 1 (see PGMCr0WpEnabled()).
805 */
806# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) && 1
807 if ( (Walk.fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK)) == PGM_PTATTRS_US_MASK
808 && (Walk.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
809 && pVM->cCpus == 1 /* Sorry, no go on SMP. Add CFGM option? */)
810 {
811 Log(("PGM #PF: Netware WP0+RO+US hack: pvFault=%RGp uErr=%#x (big=%d)\n", pvFault, uErr, Walk.fBigPage));
812 rc = pgmShwMakePageSupervisorAndWritable(pVCpu, pvFault, Walk.fBigPage, PGM_MK_PG_IS_WRITE_FAULT);
813 if (rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3)
814 {
815 PGM_INVL_PG(pVCpu, pvFault);
816 pVCpu->pgm.s.cNetwareWp0Hacks++;
817 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsHack; });
818 return rc;
819 }
820 AssertMsg(RT_FAILURE_NP(rc), ("%Rrc\n", rc));
821 Log(("pgmShwMakePageSupervisorAndWritable(%RGv) failed with rc=%Rrc - ignored\n", pvFault, rc));
822 }
823# endif
824
825 /* Interpret the access. */
826 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
827 Log(("PGM #PF: WP0 emulation (pvFault=%RGp uErr=%#x cpl=%d fBig=%d fEffUs=%d)\n", pvFault, uErr, CPUMGetGuestCPL(pVCpu), Walk.fBigPage, !!(Walk.fEffective & PGM_PTATTRS_US_MASK)));
828 if (RT_SUCCESS(rc))
829 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulInRZ);
830 else
831 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulToR3);
832 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2WPEmulation; });
833 return rc;
834 }
835# endif
836 /// @todo count the above case; else
837 if (uErr & X86_TRAP_PF_US)
838 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
839 else /* supervisor */
840 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
841
842 /*
843 * Sync the page.
844 *
845 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
846 * page is not present, which is not true in this case.
847 */
848# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
849 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
850# else
851 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
852# endif
853 if (RT_SUCCESS(rc))
854 {
855 /*
856 * Page was successfully synced, return to guest but invalidate
857 * the TLB first as the page is very likely to be in it.
858 */
859# if PGM_SHW_TYPE == PGM_TYPE_EPT
860 HMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
861# else
862 PGM_INVL_PG(pVCpu, pvFault);
863# endif
864# ifdef VBOX_STRICT
865 PGMPTWALK GstPageWalk;
866 GstPageWalk.GCPhys = RTGCPHYS_MAX;
867 if (!pVM->pgm.s.fNestedPaging)
868 {
869 rc = PGMGstGetPage(pVCpu, pvFault, &GstPageWalk);
870 AssertMsg(RT_SUCCESS(rc) && ((GstPageWalk.fEffective & X86_PTE_RW) || ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG && CPUMGetGuestCPL(pVCpu) < 3)), ("rc=%Rrc fPageGst=%RX64\n", rc, GstPageWalk.fEffective));
871 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GstPageWalk.GCPhys, GstPageWalk.fEffective));
872 }
873# if 0 /* Bogus! Triggers incorrectly with w7-64 and later for the SyncPage case: "Pde at %RGv changed behind our back?" */
874 uint64_t fPageShw = 0;
875 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
876 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */,
877 ("rc=%Rrc fPageShw=%RX64 GCPhys2=%RGp fPageGst=%RX64 pvFault=%RGv\n", rc, fPageShw, GstPageWalk.GCPhys, fPageGst, pvFault));
878# endif
879# endif /* VBOX_STRICT */
880 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndObs; });
881 return VINF_SUCCESS;
882 }
883 }
884# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
885 /*
886 * Check for Netware WP0+RO+US hack from above and undo it when user
887 * mode accesses the page again.
888 */
889 else if ( (Walk.fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK)) == PGM_PTATTRS_US_MASK
890 && (Walk.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
891 && pVCpu->pgm.s.cNetwareWp0Hacks > 0
892 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
893 && CPUMGetGuestCPL(pVCpu) == 3
894 && pVM->cCpus == 1
895 )
896 {
897 Log(("PGM #PF: Undo netware WP0+RO+US hack: pvFault=%RGp uErr=%#x\n", pvFault, uErr));
898 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
899 if (RT_SUCCESS(rc))
900 {
901 PGM_INVL_PG(pVCpu, pvFault);
902 pVCpu->pgm.s.cNetwareWp0Hacks--;
903 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsUnhack; });
904 return VINF_SUCCESS;
905 }
906 }
907# endif /* PGM_WITH_PAGING */
908
909 /** @todo else: why are we here? */
910
911# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && defined(VBOX_STRICT)
912 /*
913 * Check for VMM page flags vs. Guest page flags consistency.
914 * Currently only for debug purposes.
915 */
916 if (RT_SUCCESS(rc))
917 {
918 /* Get guest page flags. */
919 PGMPTWALK GstPageWalk;
920 int rc2 = PGMGstGetPage(pVCpu, pvFault, &GstPageWalk);
921 if (RT_SUCCESS(rc2))
922 {
923 uint64_t fPageShw = 0;
924 rc2 = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
925
926#if 0
927 /*
928 * Compare page flags.
929 * Note: we have AVL, A, D bits desynced.
930 */
931 AssertMsg( (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
932 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
933 || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0
934 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
935 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
936 && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW
937 && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US),
938 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64 rc=%d\n",
939 pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst, rc));
94001:01:15.623511 00:08:43.266063 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
94101:01:15.623511 00:08:43.266064 Location : e:\vbox\svn\trunk\srcPage flags mismatch! pvFault=fffff801b0d7b000 uErr=11 GCPhys=0000000019b52000 fPageShw=0 fPageGst=77b0000000000121 rc=0
942
94301:01:15.625516 00:08:43.268051 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
94401:01:15.625516 00:08:43.268051 Location :
945e:\vbox\svn\trunk\srcPage flags mismatch!
946pvFault=fffff801b0d7b000
947 uErr=11 X86_TRAP_PF_ID | X86_TRAP_PF_P
948GCPhys=0000000019b52000
949fPageShw=0
950fPageGst=77b0000000000121
951rc=0
952#endif
953
954 }
955 else
956 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
957 }
958 else
959 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
960# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && VBOX_STRICT */
961 }
962
963
964 /*
965 * If we get here it is because something failed above, i.e. most like guru
966 * meditiation time.
967 */
968 LogRel(("%s: returns rc=%Rrc pvFault=%RGv uErr=%RX64 cs:rip=%04x:%08RX64\n",
969 __PRETTY_FUNCTION__, rc, pvFault, (uint64_t)uErr, pRegFrame->cs.Sel, pRegFrame->rip));
970 return rc;
971
972# else /* Nested paging, EPT except PGM_GST_TYPE = PROT, NONE. */
973 NOREF(uErr); NOREF(pRegFrame); NOREF(pvFault);
974 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
975 return VERR_PGM_NOT_USED_IN_MODE;
976# endif
977}
978
979#endif /* !IN_RING3 */
980
981
982/**
983 * Emulation of the invlpg instruction.
984 *
985 *
986 * @returns VBox status code.
987 *
988 * @param pVCpu The cross context virtual CPU structure.
989 * @param GCPtrPage Page to invalidate.
990 *
991 * @remark ASSUMES that the guest is updating before invalidating. This order
992 * isn't required by the CPU, so this is speculative and could cause
993 * trouble.
994 * @remark No TLB shootdown is done on any other VCPU as we assume that
995 * invlpg emulation is the *only* reason for calling this function.
996 * (The guest has to shoot down TLB entries on other CPUs itself)
997 * Currently true, but keep in mind!
998 *
999 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1000 * Should only be required when PGMPOOL_WITH_OPTIMIZED_DIRTY_PT is active (PAE or AMD64 (for now))
1001 */
1002PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
1003{
1004#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1005 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
1006 && PGM_SHW_TYPE != PGM_TYPE_NONE
1007 int rc;
1008 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1009 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1010
1011 PGM_LOCK_ASSERT_OWNER(pVM);
1012
1013 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1014
1015 /*
1016 * Get the shadow PD entry and skip out if this PD isn't present.
1017 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1018 */
1019# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1020 const unsigned iPDDst = (uint32_t)GCPtrPage >> SHW_PD_SHIFT;
1021 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1022
1023 /* Fetch the pgm pool shadow descriptor. */
1024 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1025# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1026 if (!pShwPde)
1027 {
1028 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1029 return VINF_SUCCESS;
1030 }
1031# else
1032 Assert(pShwPde);
1033# endif
1034
1035# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1036 const unsigned iPdpt = (uint32_t)GCPtrPage >> X86_PDPT_SHIFT;
1037 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
1038
1039 /* If the shadow PDPE isn't present, then skip the invalidate. */
1040# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1041 if (!pPdptDst || !(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1042# else
1043 if (!(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1044# endif
1045 {
1046 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1047 PGM_INVL_PG(pVCpu, GCPtrPage);
1048 return VINF_SUCCESS;
1049 }
1050
1051 /* Fetch the pgm pool shadow descriptor. */
1052 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1053 AssertReturn(pShwPde, VERR_PGM_POOL_GET_PAGE_FAILED);
1054
1055 PX86PDPAE pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1056 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1057 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1058
1059# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1060 /* PML4 */
1061 /*const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;*/
1062 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1063 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1064 PX86PDPAE pPDDst;
1065 PX86PDPT pPdptDst;
1066 PX86PML4E pPml4eDst;
1067 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1068 if (rc != VINF_SUCCESS)
1069 {
1070 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1071 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1072 PGM_INVL_PG(pVCpu, GCPtrPage);
1073 return VINF_SUCCESS;
1074 }
1075 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1076 Assert(pPDDst);
1077 Assert(pPdptDst->a[iPdpt].u & X86_PDPE_P);
1078
1079 /* Fetch the pgm pool shadow descriptor. */
1080 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1081 Assert(pShwPde);
1082
1083# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1084
1085 const SHWPDE PdeDst = *pPdeDst;
1086 if (!(PdeDst.u & X86_PDE_P))
1087 {
1088 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1089 PGM_INVL_PG(pVCpu, GCPtrPage);
1090 return VINF_SUCCESS;
1091 }
1092
1093 /*
1094 * Get the guest PD entry and calc big page.
1095 */
1096# if PGM_GST_TYPE == PGM_TYPE_32BIT
1097 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1098 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
1099 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1100# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1101 unsigned iPDSrc = 0;
1102# if PGM_GST_TYPE == PGM_TYPE_PAE
1103 X86PDPE PdpeSrcIgn;
1104 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrcIgn);
1105# else /* AMD64 */
1106 PX86PML4E pPml4eSrcIgn;
1107 X86PDPE PdpeSrcIgn;
1108 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrcIgn, &PdpeSrcIgn, &iPDSrc);
1109# endif
1110 GSTPDE PdeSrc;
1111
1112 if (pPDSrc)
1113 PdeSrc = pPDSrc->a[iPDSrc];
1114 else
1115 PdeSrc.u = 0;
1116# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1117 const bool fWasBigPage = RT_BOOL(PdeDst.u & PGM_PDFLAGS_BIG_PAGE);
1118 const bool fIsBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
1119 if (fWasBigPage != fIsBigPage)
1120 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1121
1122# ifdef IN_RING3
1123 /*
1124 * If a CR3 Sync is pending we may ignore the invalidate page operation
1125 * depending on the kind of sync and if it's a global page or not.
1126 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1127 */
1128# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1129 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1130 || ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1131 && fIsBigPage
1132 && (PdeSrc.u & X86_PDE4M_G)
1133 )
1134 )
1135# else
1136 if (VM_FF_IS_ANY_SET(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1137# endif
1138 {
1139 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1140 return VINF_SUCCESS;
1141 }
1142# endif /* IN_RING3 */
1143
1144 /*
1145 * Deal with the Guest PDE.
1146 */
1147 rc = VINF_SUCCESS;
1148 if (PdeSrc.u & X86_PDE_P)
1149 {
1150 Assert( (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
1151 && ((PdeSrc.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW) || pVCpu->pgm.s.cNetwareWp0Hacks > 0));
1152 if (!fIsBigPage)
1153 {
1154 /*
1155 * 4KB - page.
1156 */
1157 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1158 RTGCPHYS GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1159
1160# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1161 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1162 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
1163# endif
1164 if (pShwPage->GCPhys == GCPhys)
1165 {
1166 /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1167 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1168
1169 PGSTPT pPTSrc;
1170 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1171 if (RT_SUCCESS(rc))
1172 {
1173 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1174 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1175 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1176 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1177 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1178 GCPtrPage, PteSrc.u & X86_PTE_P,
1179 (PteSrc.u & PdeSrc.u & X86_PTE_RW),
1180 (PteSrc.u & PdeSrc.u & X86_PTE_US),
1181 (uint64_t)PteSrc.u,
1182 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1183 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1184 }
1185 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1186 PGM_INVL_PG(pVCpu, GCPtrPage);
1187 }
1188 else
1189 {
1190 /*
1191 * The page table address changed.
1192 */
1193 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1194 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1195 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1196 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1197 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1198 PGM_INVL_VCPU_TLBS(pVCpu);
1199 }
1200 }
1201 else
1202 {
1203 /*
1204 * 2/4MB - page.
1205 */
1206 /* Before freeing the page, check if anything really changed. */
1207 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1208 RTGCPHYS GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1209# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1210 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1211 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
1212# endif
1213 if ( pShwPage->GCPhys == GCPhys
1214 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1215 {
1216 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1217 /** @todo This test is wrong as it cannot check the G bit!
1218 * FIXME */
1219 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1220 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1221 && ( (PdeSrc.u & X86_PDE4M_D) /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1222 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1223 {
1224 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1225 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1226 return VINF_SUCCESS;
1227 }
1228 }
1229
1230 /*
1231 * Ok, the page table is present and it's been changed in the guest.
1232 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1233 * We could do this for some flushes in GC too, but we need an algorithm for
1234 * deciding which 4MB pages containing code likely to be executed very soon.
1235 */
1236 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1237 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1238 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1239 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1240 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1241 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1242 }
1243 }
1244 else
1245 {
1246 /*
1247 * Page directory is not present, mark shadow PDE not present.
1248 */
1249 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1250 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1251 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1252 PGM_INVL_PG(pVCpu, GCPtrPage);
1253 }
1254 return rc;
1255
1256#else /* guest real and protected mode, nested + ept, none. */
1257 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1258 NOREF(pVCpu); NOREF(GCPtrPage);
1259 return VINF_SUCCESS;
1260#endif
1261}
1262
1263#if PGM_SHW_TYPE != PGM_TYPE_NONE
1264
1265/**
1266 * Update the tracking of shadowed pages.
1267 *
1268 * @param pVCpu The cross context virtual CPU structure.
1269 * @param pShwPage The shadow page.
1270 * @param HCPhys The physical page we is being dereferenced.
1271 * @param iPte Shadow PTE index
1272 * @param GCPhysPage Guest physical address (only valid if pShwPage->fDirty is set)
1273 */
1274DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte,
1275 RTGCPHYS GCPhysPage)
1276{
1277 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1278
1279# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1280 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1281 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1282
1283 /* Use the hint we retrieved from the cached guest PT. */
1284 if (pShwPage->fDirty)
1285 {
1286 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1287
1288 Assert(pShwPage->cPresent);
1289 Assert(pPool->cPresent);
1290 pShwPage->cPresent--;
1291 pPool->cPresent--;
1292
1293 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysPage);
1294 AssertRelease(pPhysPage);
1295 pgmTrackDerefGCPhys(pPool, pShwPage, pPhysPage, iPte);
1296 return;
1297 }
1298# else
1299 NOREF(GCPhysPage);
1300# endif
1301
1302 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatTrackDeref, a);
1303 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1304
1305 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1306 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1307 * 2. write protect all shadowed pages. I.e. implement caching.
1308 */
1309 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1310
1311 /*
1312 * Find the guest address.
1313 */
1314 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
1315 pRam;
1316 pRam = pRam->CTX_SUFF(pNext))
1317 {
1318 unsigned iPage = pRam->cb >> GUEST_PAGE_SHIFT;
1319 while (iPage-- > 0)
1320 {
1321 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1322 {
1323 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1324
1325 Assert(pShwPage->cPresent);
1326 Assert(pPool->cPresent);
1327 pShwPage->cPresent--;
1328 pPool->cPresent--;
1329
1330 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1331 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatTrackDeref, a);
1332 return;
1333 }
1334 }
1335 }
1336
1337 for (;;)
1338 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1339}
1340
1341
1342/**
1343 * Update the tracking of shadowed pages.
1344 *
1345 * @param pVCpu The cross context virtual CPU structure.
1346 * @param pShwPage The shadow page.
1347 * @param u16 The top 16-bit of the pPage->HCPhys.
1348 * @param pPage Pointer to the guest page. this will be modified.
1349 * @param iPTDst The index into the shadow table.
1350 */
1351DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16,
1352 PPGMPAGE pPage, const unsigned iPTDst)
1353{
1354 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1355
1356 /*
1357 * Just deal with the simple first time here.
1358 */
1359 if (!u16)
1360 {
1361 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackVirgin);
1362 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1363 /* Save the page table index. */
1364 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, iPTDst);
1365 }
1366 else
1367 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1368
1369 /* write back */
1370 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1371 PGM_PAGE_SET_TRACKING(pVM, pPage, u16);
1372
1373 /* update statistics. */
1374 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1375 pShwPage->cPresent++;
1376 if (pShwPage->iFirstPresent > iPTDst)
1377 pShwPage->iFirstPresent = iPTDst;
1378}
1379
1380
1381/**
1382 * Modifies a shadow PTE to account for access handlers.
1383 *
1384 * @param pVM The cross context VM structure.
1385 * @param pPage The page in question.
1386 * @param fPteSrc The shadowed flags of the source PTE. Must include the
1387 * A (accessed) bit so it can be emulated correctly.
1388 * @param pPteDst The shadow PTE (output). This is temporary storage and
1389 * does not need to be set atomically.
1390 */
1391DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVMCC pVM, PCPGMPAGE pPage, uint64_t fPteSrc, PSHWPTE pPteDst)
1392{
1393 NOREF(pVM); RT_NOREF_PV(fPteSrc);
1394
1395 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1396 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1397 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1398 {
1399 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark read-only\n", pPage));
1400# if PGM_SHW_TYPE == PGM_TYPE_EPT
1401 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage) | EPT_E_READ | EPT_E_EXECUTE | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
1402# else
1403 if (fPteSrc & X86_PTE_A)
1404 {
1405 SHW_PTE_SET(*pPteDst, fPteSrc | PGM_PAGE_GET_HCPHYS(pPage));
1406 SHW_PTE_SET_RO(*pPteDst);
1407 }
1408 else
1409 SHW_PTE_SET(*pPteDst, 0);
1410# endif
1411 }
1412# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1413# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1414 else if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
1415 && ( BTH_IS_NP_ACTIVE(pVM)
1416 || (fPteSrc & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW) /** @todo Remove X86_PTE_US here and pGstWalk->Core.fEffectiveUS before the sync page test. */
1417# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1418 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1419# endif
1420 )
1421 {
1422 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1423# if PGM_SHW_TYPE == PGM_TYPE_EPT
1424 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1425 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg
1426 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1427 | EPT_E_WRITE
1428 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1429 | EPT_E_MEMTYPE_INVALID_3;
1430# else
1431 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1432 SHW_PTE_SET(*pPteDst, pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P);
1433# endif
1434 }
1435# endif
1436# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1437 else
1438 {
1439 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1440 SHW_PTE_SET(*pPteDst, 0);
1441 }
1442 /** @todo count these kinds of entries. */
1443}
1444
1445
1446/**
1447 * Creates a 4K shadow page for a guest page.
1448 *
1449 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1450 * physical address. The PdeSrc argument only the flags are used. No page
1451 * structured will be mapped in this function.
1452 *
1453 * @param pVCpu The cross context virtual CPU structure.
1454 * @param pPteDst Destination page table entry.
1455 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1456 * Can safely assume that only the flags are being used.
1457 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1458 * @param pShwPage Pointer to the shadow page.
1459 * @param iPTDst The index into the shadow table.
1460 *
1461 * @remark Not used for 2/4MB pages!
1462 */
1463# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
1464static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1465 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1466# else
1467static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage,
1468 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1469# endif
1470{
1471 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1472 RTGCPHYS GCPhysOldPage = NIL_RTGCPHYS;
1473
1474# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1475 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1476 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1477
1478 if (pShwPage->fDirty)
1479 {
1480 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1481 PGSTPT pGstPT;
1482
1483 /* Note that iPTDst can be used to index the guest PT even in the pae/32bit combo as we copy only half the table; see pgmPoolAddDirtyPage. */
1484 pGstPT = (PGSTPT)&pPool->aDirtyPages[pShwPage->idxDirtyEntry].aPage[0];
1485 GCPhysOldPage = GST_GET_PTE_GCPHYS(pGstPT->a[iPTDst]);
1486 pGstPT->a[iPTDst].u = PteSrc.u;
1487 }
1488# else
1489 Assert(!pShwPage->fDirty);
1490# endif
1491
1492# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1493 if ( (PteSrc.u & X86_PTE_P)
1494 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1495# endif
1496 {
1497# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1498 RTGCPHYS GCPhysPage = GST_GET_PTE_GCPHYS(PteSrc);
1499# endif
1500 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysPage);
1501
1502 /*
1503 * Find the ram range.
1504 */
1505 PPGMPAGE pPage;
1506 int rc = pgmPhysGetPageEx(pVM, GCPhysPage, &pPage);
1507 if (RT_SUCCESS(rc))
1508 {
1509 /* Ignore ballooned pages.
1510 Don't return errors or use a fatal assert here as part of a
1511 shadow sync range might included ballooned pages. */
1512 if (PGM_PAGE_IS_BALLOONED(pPage))
1513 {
1514 Assert(!SHW_PTE_IS_P(*pPteDst)); /** @todo user tracking needs updating if this triggers. */
1515 return;
1516 }
1517
1518# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1519 /* Make the page writable if necessary. */
1520 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1521 && ( PGM_PAGE_IS_ZERO(pPage)
1522# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1523 || ( (PteSrc.u & X86_PTE_RW)
1524# else
1525 || ( 1
1526# endif
1527 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1528# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1529 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1530# endif
1531# ifdef VBOX_WITH_PAGE_SHARING
1532 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1533# endif
1534 )
1535 )
1536 )
1537 {
1538 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhysPage);
1539 AssertRC(rc);
1540 }
1541# endif
1542
1543 /*
1544 * Make page table entry.
1545 */
1546 SHWPTE PteDst;
1547# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1548 uint64_t fGstShwPteFlags = GST_GET_PTE_SHW_FLAGS(pVCpu, PteSrc);
1549# else
1550 uint64_t fGstShwPteFlags = X86_PTE_P | X86_PTE_RW | X86_PTE_US | X86_PTE_A | X86_PTE_D;
1551# endif
1552 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1553 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, fGstShwPteFlags, &PteDst);
1554 else
1555 {
1556# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1557 /*
1558 * If the page or page directory entry is not marked accessed,
1559 * we mark the page not present.
1560 */
1561 if (!(PteSrc.u & X86_PTE_A) || !(PdeSrc.u & X86_PDE_A))
1562 {
1563 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1564 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,AccessedPage));
1565 SHW_PTE_SET(PteDst, 0);
1566 }
1567 /*
1568 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1569 * when the page is modified.
1570 */
1571 else if (!(PteSrc.u & X86_PTE_D) && (PdeSrc.u & PteSrc.u & X86_PTE_RW))
1572 {
1573 AssertCompile(X86_PTE_RW == X86_PDE_RW);
1574 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPage));
1575 SHW_PTE_SET(PteDst,
1576 fGstShwPteFlags
1577 | PGM_PAGE_GET_HCPHYS(pPage)
1578 | PGM_PTFLAGS_TRACK_DIRTY);
1579 SHW_PTE_SET_RO(PteDst);
1580 }
1581 else
1582# endif
1583 {
1584 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageSkipped));
1585# if PGM_SHW_TYPE == PGM_TYPE_EPT
1586 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage)
1587 | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
1588# else
1589 SHW_PTE_SET(PteDst, fGstShwPteFlags | PGM_PAGE_GET_HCPHYS(pPage));
1590# endif
1591 }
1592
1593 /*
1594 * Make sure only allocated pages are mapped writable.
1595 */
1596 if ( SHW_PTE_IS_P_RW(PteDst)
1597 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1598 {
1599 /* Still applies to shared pages. */
1600 Assert(!PGM_PAGE_IS_ZERO(pPage));
1601 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet. Why, isn't it? */
1602 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", GCPhysPage, pPage, iPTDst));
1603 }
1604 }
1605
1606 /*
1607 * Keep user track up to date.
1608 */
1609 if (SHW_PTE_IS_P(PteDst))
1610 {
1611 if (!SHW_PTE_IS_P(*pPteDst))
1612 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1613 else if (SHW_PTE_GET_HCPHYS(*pPteDst) != SHW_PTE_GET_HCPHYS(PteDst))
1614 {
1615 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst), SHW_PTE_LOG64(PteDst)));
1616 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1617 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1618 }
1619 }
1620 else if (SHW_PTE_IS_P(*pPteDst))
1621 {
1622 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1623 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1624 }
1625
1626 /*
1627 * Update statistics and commit the entry.
1628 */
1629# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1630 if (!(PteSrc.u & X86_PTE_G))
1631 pShwPage->fSeenNonGlobal = true;
1632# endif
1633 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
1634 return;
1635 }
1636
1637/** @todo count these three different kinds. */
1638 Log2(("SyncPageWorker: invalid address in Pte\n"));
1639 }
1640# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1641 else if (!(PteSrc.u & X86_PTE_P))
1642 Log2(("SyncPageWorker: page not present in Pte\n"));
1643 else
1644 Log2(("SyncPageWorker: invalid Pte\n"));
1645# endif
1646
1647 /*
1648 * The page is not present or the PTE is bad. Replace the shadow PTE by
1649 * an empty entry, making sure to keep the user tracking up to date.
1650 */
1651 if (SHW_PTE_IS_P(*pPteDst))
1652 {
1653 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1654 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1655 }
1656 SHW_PTE_ATOMIC_SET(*pPteDst, 0);
1657}
1658
1659
1660/**
1661 * Syncs a guest OS page.
1662 *
1663 * There are no conflicts at this point, neither is there any need for
1664 * page table allocations.
1665 *
1666 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
1667 * When called in AMD64 guest mode, the guest PML4E shall be valid.
1668 *
1669 * @returns VBox status code.
1670 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1671 * @param pVCpu The cross context virtual CPU structure.
1672 * @param PdeSrc Page directory entry of the guest.
1673 * @param GCPtrPage Guest context page address.
1674 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1675 * @param uErr Fault error (X86_TRAP_PF_*).
1676 */
1677static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1678{
1679 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1680 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
1681 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1682 RT_NOREF_PV(uErr); RT_NOREF_PV(cPages); RT_NOREF_PV(GCPtrPage);
1683
1684 PGM_LOCK_ASSERT_OWNER(pVM);
1685
1686# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1687 || PGM_GST_TYPE == PGM_TYPE_PAE \
1688 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1689 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE)
1690
1691 /*
1692 * Assert preconditions.
1693 */
1694 Assert(PdeSrc.u & X86_PDE_P);
1695 Assert(cPages);
1696# if 0 /* rarely useful; leave for debugging. */
1697 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1698# endif
1699
1700 /*
1701 * Get the shadow PDE, find the shadow page table in the pool.
1702 */
1703# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1704 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1705 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1706
1707 /* Fetch the pgm pool shadow descriptor. */
1708 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1709 Assert(pShwPde);
1710
1711# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1712 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1713 PPGMPOOLPAGE pShwPde = NULL;
1714 PX86PDPAE pPDDst;
1715
1716 /* Fetch the pgm pool shadow descriptor. */
1717 int rc2 = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
1718 AssertRCSuccessReturn(rc2, rc2);
1719 Assert(pShwPde);
1720
1721 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1722 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1723
1724# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1725 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1726 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1727 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1728 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1729
1730 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1731 AssertRCSuccessReturn(rc2, rc2);
1732 Assert(pPDDst && pPdptDst);
1733 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1734# endif
1735 SHWPDE PdeDst = *pPdeDst;
1736
1737 /*
1738 * - In the guest SMP case we could have blocked while another VCPU reused
1739 * this page table.
1740 * - With W7-64 we may also take this path when the A bit is cleared on
1741 * higher level tables (PDPE/PML4E). The guest does not invalidate the
1742 * relevant TLB entries. If we're write monitoring any page mapped by
1743 * the modified entry, we may end up here with a "stale" TLB entry.
1744 */
1745 if (!(PdeDst.u & X86_PDE_P))
1746 {
1747 Log(("CPU%u: SyncPage: Pde at %RGv changed behind our back? (pPdeDst=%p/%RX64) uErr=%#x\n", pVCpu->idCpu, GCPtrPage, pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
1748 AssertMsg(pVM->cCpus > 1 || (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW),
1749 ("Unexpected missing PDE p=%p/%RX64 uErr=%#x\n", pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
1750 if (uErr & X86_TRAP_PF_P)
1751 PGM_INVL_PG(pVCpu, GCPtrPage);
1752 return VINF_SUCCESS; /* force the instruction to be executed again. */
1753 }
1754
1755 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1756 Assert(pShwPage);
1757
1758# if PGM_GST_TYPE == PGM_TYPE_AMD64
1759 /* Fetch the pgm pool shadow descriptor. */
1760 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1761 Assert(pShwPde);
1762# endif
1763
1764 /*
1765 * Check that the page is present and that the shadow PDE isn't out of sync.
1766 */
1767 const bool fBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
1768 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
1769 RTGCPHYS GCPhys;
1770 if (!fBigPage)
1771 {
1772 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1773# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1774 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1775 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
1776# endif
1777 }
1778 else
1779 {
1780 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1781# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1782 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1783 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
1784# endif
1785 }
1786 /** @todo This doesn't check the G bit of 2/4MB pages. FIXME */
1787 if ( fPdeValid
1788 && pShwPage->GCPhys == GCPhys
1789 && (PdeSrc.u & X86_PDE_P)
1790 && (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
1791 && ((PdeSrc.u & X86_PDE_RW) == (PdeDst.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW))
1792# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1793 && ((PdeSrc.u & X86_PDE_PAE_NX) == (PdeDst.u & X86_PDE_PAE_NX) || !GST_IS_NX_ACTIVE(pVCpu))
1794# endif
1795 )
1796 {
1797 /*
1798 * Check that the PDE is marked accessed already.
1799 * Since we set the accessed bit *before* getting here on a #PF, this
1800 * check is only meant for dealing with non-#PF'ing paths.
1801 */
1802 if (PdeSrc.u & X86_PDE_A)
1803 {
1804 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1805 if (!fBigPage)
1806 {
1807 /*
1808 * 4KB Page - Map the guest page table.
1809 */
1810 PGSTPT pPTSrc;
1811 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1812 if (RT_SUCCESS(rc))
1813 {
1814# ifdef PGM_SYNC_N_PAGES
1815 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1816 if ( cPages > 1
1817 && !(uErr & X86_TRAP_PF_P)
1818 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
1819 {
1820 /*
1821 * This code path is currently only taken when the caller is PGMTrap0eHandler
1822 * for non-present pages!
1823 *
1824 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1825 * deal with locality.
1826 */
1827 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1828# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1829 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1830 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1831# else
1832 const unsigned offPTSrc = 0;
1833# endif
1834 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1835 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1836 iPTDst = 0;
1837 else
1838 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1839
1840 for (; iPTDst < iPTDstEnd; iPTDst++)
1841 {
1842 const PGSTPTE pPteSrc = &pPTSrc->a[offPTSrc + iPTDst];
1843
1844 if ( (pPteSrc->u & X86_PTE_P)
1845 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
1846 {
1847 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT))
1848 | ((offPTSrc + iPTDst) << GUEST_PAGE_SHIFT);
1849 NOREF(GCPtrCurPage);
1850 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, *pPteSrc, pShwPage, iPTDst);
1851 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1852 GCPtrCurPage, pPteSrc->u & X86_PTE_P,
1853 !!(pPteSrc->u & PdeSrc.u & X86_PTE_RW),
1854 !!(pPteSrc->u & PdeSrc.u & X86_PTE_US),
1855 (uint64_t)pPteSrc->u,
1856 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1857 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1858 }
1859 }
1860 }
1861 else
1862# endif /* PGM_SYNC_N_PAGES */
1863 {
1864 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1865 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1866 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1867 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1868 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1869 GCPtrPage, PteSrc.u & X86_PTE_P,
1870 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
1871 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
1872 (uint64_t)PteSrc.u,
1873 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1874 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1875 }
1876 }
1877 else /* MMIO or invalid page: emulated in #PF handler. */
1878 {
1879 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1880 Assert(!SHW_PTE_IS_P(pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK]));
1881 }
1882 }
1883 else
1884 {
1885 /*
1886 * 4/2MB page - lazy syncing shadow 4K pages.
1887 * (There are many causes of getting here, it's no longer only CSAM.)
1888 */
1889 /* Calculate the GC physical address of this 4KB shadow page. */
1890 GCPhys = PGM_A20_APPLY(pVCpu, GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK));
1891 /* Find ram range. */
1892 PPGMPAGE pPage;
1893 int rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
1894 if (RT_SUCCESS(rc))
1895 {
1896 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
1897
1898# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1899 /* Try to make the page writable if necessary. */
1900 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1901 && ( PGM_PAGE_IS_ZERO(pPage)
1902 || ( (PdeSrc.u & X86_PDE_RW)
1903 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1904# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1905 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1906# endif
1907# ifdef VBOX_WITH_PAGE_SHARING
1908 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1909# endif
1910 )
1911 )
1912 )
1913 {
1914 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1915 AssertRC(rc);
1916 }
1917# endif
1918
1919 /*
1920 * Make shadow PTE entry.
1921 */
1922 SHWPTE PteDst;
1923 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1924 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc), &PteDst);
1925 else
1926 SHW_PTE_SET(PteDst, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc) | PGM_PAGE_GET_HCPHYS(pPage));
1927
1928 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1929 if ( SHW_PTE_IS_P(PteDst)
1930 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
1931 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1932
1933 /* Make sure only allocated pages are mapped writable. */
1934 if ( SHW_PTE_IS_P_RW(PteDst)
1935 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1936 {
1937 /* Still applies to shared pages. */
1938 Assert(!PGM_PAGE_IS_ZERO(pPage));
1939 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
1940 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1941 }
1942
1943 SHW_PTE_ATOMIC_SET2(pPTDst->a[iPTDst], PteDst);
1944
1945 /*
1946 * If the page is not flagged as dirty and is writable, then make it read-only
1947 * at PD level, so we can set the dirty bit when the page is modified.
1948 *
1949 * ASSUMES that page access handlers are implemented on page table entry level.
1950 * Thus we will first catch the dirty access and set PDE.D and restart. If
1951 * there is an access handler, we'll trap again and let it work on the problem.
1952 */
1953 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1954 * As for invlpg, it simply frees the whole shadow PT.
1955 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1956 if ((PdeSrc.u & (X86_PDE4M_D | X86_PDE_RW)) == X86_PDE_RW)
1957 {
1958 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
1959 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1960 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
1961 }
1962 else
1963 {
1964 PdeDst.u &= ~(SHWUINT)(PGM_PDFLAGS_TRACK_DIRTY | X86_PDE_RW);
1965 PdeDst.u |= PdeSrc.u & X86_PDE_RW;
1966 }
1967 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
1968 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1969 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US),
1970 (uint64_t)PdeSrc.u, GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1971 }
1972 else
1973 {
1974 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1975 /** @todo must wipe the shadow page table entry in this
1976 * case. */
1977 }
1978 }
1979 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
1980 return VINF_SUCCESS;
1981 }
1982
1983 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDNAs));
1984 }
1985 else if (fPdeValid)
1986 {
1987 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1988 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1989 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1990 }
1991 else
1992 {
1993/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSyncAndInvalid));
1994 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1995 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1996 }
1997
1998 /*
1999 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
2000 * Yea, I'm lazy.
2001 */
2002 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
2003 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
2004
2005 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2006 PGM_INVL_VCPU_TLBS(pVCpu);
2007 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
2008
2009
2010# elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2011 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
2012 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
2013 NOREF(PdeSrc);
2014
2015# ifdef PGM_SYNC_N_PAGES
2016 /*
2017 * Get the shadow PDE, find the shadow page table in the pool.
2018 */
2019# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2020 X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
2021
2022# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2023 X86PDEPAE PdeDst = pgmShwGetPaePDE(pVCpu, GCPtrPage);
2024
2025# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2026 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2027 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2028 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2029 X86PDEPAE PdeDst;
2030 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2031
2032 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2033 AssertRCSuccessReturn(rc, rc);
2034 Assert(pPDDst && pPdptDst);
2035 PdeDst = pPDDst->a[iPDDst];
2036
2037# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2038 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2039 PEPTPD pPDDst;
2040 EPTPDE PdeDst;
2041
2042 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2043 if (rc != VINF_SUCCESS)
2044 {
2045 AssertRC(rc);
2046 return rc;
2047 }
2048 Assert(pPDDst);
2049 PdeDst = pPDDst->a[iPDDst];
2050# endif
2051 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2052 if (!SHW_PDE_IS_P(PdeDst))
2053 {
2054 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2055 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2056 return VINF_SUCCESS; /* force the instruction to be executed again. */
2057 }
2058
2059 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2060 if (SHW_PDE_IS_BIG(PdeDst))
2061 {
2062 Assert(pVM->pgm.s.fNestedPaging);
2063 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2064 return VINF_SUCCESS;
2065 }
2066
2067 /* Mask away the page offset. */
2068 GCPtrPage &= ~((RTGCPTR)0xfff);
2069
2070 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2071 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2072
2073 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2074 if ( cPages > 1
2075 && !(uErr & X86_TRAP_PF_P)
2076 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2077 {
2078 /*
2079 * This code path is currently only taken when the caller is PGMTrap0eHandler
2080 * for non-present pages!
2081 *
2082 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2083 * deal with locality.
2084 */
2085 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2086 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2087 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2088 iPTDst = 0;
2089 else
2090 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2091 for (; iPTDst < iPTDstEnd; iPTDst++)
2092 {
2093 if (!SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2094 {
2095 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2096 | (iPTDst << GUEST_PAGE_SHIFT));
2097
2098 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2099 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
2100 GCPtrCurPage,
2101 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2102 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2103
2104 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
2105 break;
2106 }
2107 else
2108 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n",
2109 (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << GUEST_PAGE_SHIFT), iPTDst, SHW_PTE_LOG64(pPTDst->a[iPTDst]) ));
2110 }
2111 }
2112 else
2113# endif /* PGM_SYNC_N_PAGES */
2114 {
2115 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2116 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2117 | (iPTDst << GUEST_PAGE_SHIFT));
2118
2119 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2120
2121 Log2(("SyncPage: 4K %RGv PteSrc:{P=1 RW=1 U=1}PteDst=%08llx%s\n",
2122 GCPtrPage,
2123 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2124 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2125 }
2126 return VINF_SUCCESS;
2127
2128# else
2129 NOREF(PdeSrc);
2130 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2131 return VERR_PGM_NOT_USED_IN_MODE;
2132# endif
2133}
2134
2135#endif /* PGM_SHW_TYPE != PGM_TYPE_NONE */
2136#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
2137
2138/**
2139 * CheckPageFault helper for returning a page fault indicating a non-present
2140 * (NP) entry in the page translation structures.
2141 *
2142 * @returns VINF_EM_RAW_GUEST_TRAP.
2143 * @param pVCpu The cross context virtual CPU structure.
2144 * @param uErr The error code of the shadow fault. Corrections to
2145 * TRPM's copy will be made if necessary.
2146 * @param GCPtrPage For logging.
2147 * @param uPageFaultLevel For logging.
2148 */
2149DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnNP)(PVMCPUCC pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2150{
2151 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyTrackRealPF));
2152 AssertMsg(!(uErr & X86_TRAP_PF_P), ("%#x\n", uErr));
2153 AssertMsg(!(uErr & X86_TRAP_PF_RSVD), ("%#x\n", uErr));
2154 if (uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2155 TRPMSetErrorCode(pVCpu, uErr & ~(X86_TRAP_PF_RSVD | X86_TRAP_PF_P));
2156
2157 Log(("CheckPageFault: real page fault (notp) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2158 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(uPageFaultLevel);
2159 return VINF_EM_RAW_GUEST_TRAP;
2160}
2161
2162
2163/**
2164 * CheckPageFault helper for returning a page fault indicating a reserved bit
2165 * (RSVD) error in the page translation structures.
2166 *
2167 * @returns VINF_EM_RAW_GUEST_TRAP.
2168 * @param pVCpu The cross context virtual CPU structure.
2169 * @param uErr The error code of the shadow fault. Corrections to
2170 * TRPM's copy will be made if necessary.
2171 * @param GCPtrPage For logging.
2172 * @param uPageFaultLevel For logging.
2173 */
2174DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnRSVD)(PVMCPUCC pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2175{
2176 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyTrackRealPF));
2177 if ((uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2178 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_RSVD | X86_TRAP_PF_P);
2179
2180 Log(("CheckPageFault: real page fault (rsvd) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2181 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(uPageFaultLevel);
2182 return VINF_EM_RAW_GUEST_TRAP;
2183}
2184
2185
2186/**
2187 * CheckPageFault helper for returning a page protection fault (P).
2188 *
2189 * @returns VINF_EM_RAW_GUEST_TRAP.
2190 * @param pVCpu The cross context virtual CPU structure.
2191 * @param uErr The error code of the shadow fault. Corrections to
2192 * TRPM's copy will be made if necessary.
2193 * @param GCPtrPage For logging.
2194 * @param uPageFaultLevel For logging.
2195 */
2196DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnProt)(PVMCPUCC pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2197{
2198 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyTrackRealPF));
2199 AssertMsg(uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID), ("%#x\n", uErr));
2200 if ((uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) != X86_TRAP_PF_P)
2201 TRPMSetErrorCode(pVCpu, (uErr & ~X86_TRAP_PF_RSVD) | X86_TRAP_PF_P);
2202
2203 Log(("CheckPageFault: real page fault (prot) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2204 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(uPageFaultLevel);
2205 return VINF_EM_RAW_GUEST_TRAP;
2206}
2207
2208
2209/**
2210 * Handle dirty bit tracking faults.
2211 *
2212 * @returns VBox status code.
2213 * @param pVCpu The cross context virtual CPU structure.
2214 * @param uErr Page fault error code.
2215 * @param pPdeSrc Guest page directory entry.
2216 * @param pPdeDst Shadow page directory entry.
2217 * @param GCPtrPage Guest context page address.
2218 */
2219static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc,
2220 RTGCPTR GCPtrPage)
2221{
2222 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2223 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2224 NOREF(uErr);
2225
2226 PGM_LOCK_ASSERT_OWNER(pVM);
2227
2228 /*
2229 * Handle big page.
2230 */
2231 if ((pPdeSrc->u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu))
2232 {
2233 if ((pPdeDst->u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
2234 {
2235 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
2236 Assert(pPdeSrc->u & X86_PDE_RW);
2237
2238 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2239 * fault again and take this path to only invalidate the entry (see below). */
2240 SHWPDE PdeDst = *pPdeDst;
2241 PdeDst.u &= ~(SHWUINT)PGM_PDFLAGS_TRACK_DIRTY;
2242 PdeDst.u |= X86_PDE_RW | X86_PDE_A;
2243 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2244 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2245 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2246 }
2247
2248# ifdef IN_RING0
2249 /* Check for stale TLB entry; only applies to the SMP guest case. */
2250 if ( pVM->cCpus > 1
2251 && (pPdeDst->u & (X86_PDE_P | X86_PDE_RW | X86_PDE_A)) == (X86_PDE_P | X86_PDE_RW | X86_PDE_A))
2252 {
2253 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2254 if (pShwPage)
2255 {
2256 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2257 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2258 if (SHW_PTE_IS_P_RW(*pPteDst))
2259 {
2260 /* Stale TLB entry. */
2261 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
2262 PGM_INVL_PG(pVCpu, GCPtrPage);
2263 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2264 }
2265 }
2266 }
2267# endif /* IN_RING0 */
2268 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2269 }
2270
2271 /*
2272 * Map the guest page table.
2273 */
2274 PGSTPT pPTSrc;
2275 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(*pPdeSrc), &pPTSrc);
2276 AssertRCReturn(rc, rc);
2277
2278 if (SHW_PDE_IS_P(*pPdeDst))
2279 {
2280 GSTPTE const *pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2281 const GSTPTE PteSrc = *pPteSrc;
2282
2283 /*
2284 * Map shadow page table.
2285 */
2286 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2287 if (pShwPage)
2288 {
2289 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2290 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2291 if (SHW_PTE_IS_P(*pPteDst)) /** @todo Optimize accessed bit emulation? */
2292 {
2293 if (SHW_PTE_IS_TRACK_DIRTY(*pPteDst))
2294 {
2295 PPGMPAGE pPage = pgmPhysGetPage(pVM, GST_GET_PTE_GCPHYS(PteSrc));
2296 SHWPTE PteDst = *pPteDst;
2297
2298 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2299 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
2300
2301 Assert(PteSrc.u & X86_PTE_RW);
2302
2303 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
2304 * entry will not harm; write access will simply fault again and
2305 * take this path to only invalidate the entry.
2306 */
2307 if (RT_LIKELY(pPage))
2308 {
2309 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2310 {
2311 //AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
2312 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
2313 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2314 SHW_PTE_SET_RO(PteDst);
2315 }
2316 else
2317 {
2318 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2319 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2320 {
2321 rc = pgmPhysPageMakeWritable(pVM, pPage, GST_GET_PTE_GCPHYS(PteSrc));
2322 AssertRC(rc);
2323 }
2324 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2325 SHW_PTE_SET_RW(PteDst);
2326 else
2327 {
2328 /* Still applies to shared pages. */
2329 Assert(!PGM_PAGE_IS_ZERO(pPage));
2330 SHW_PTE_SET_RO(PteDst);
2331 }
2332 }
2333 }
2334 else
2335 SHW_PTE_SET_RW(PteDst); /** @todo r=bird: This doesn't make sense to me. */
2336
2337 SHW_PTE_SET(PteDst, (SHW_PTE_GET_U(PteDst) | X86_PTE_D | X86_PTE_A) & ~(uint64_t)PGM_PTFLAGS_TRACK_DIRTY);
2338 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
2339 PGM_INVL_PG(pVCpu, GCPtrPage);
2340 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2341 }
2342
2343# ifdef IN_RING0
2344 /* Check for stale TLB entry; only applies to the SMP guest case. */
2345 if ( pVM->cCpus > 1
2346 && SHW_PTE_IS_RW(*pPteDst)
2347 && SHW_PTE_IS_A(*pPteDst))
2348 {
2349 /* Stale TLB entry. */
2350 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
2351 PGM_INVL_PG(pVCpu, GCPtrPage);
2352 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2353 }
2354# endif
2355 }
2356 }
2357 else
2358 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2359 }
2360
2361 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2362}
2363
2364#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
2365
2366/**
2367 * Sync a shadow page table.
2368 *
2369 * The shadow page table is not present in the shadow PDE.
2370 *
2371 * Handles mapping conflicts.
2372 *
2373 * This is called by VerifyAccessSyncPage, PrefetchPage, InvalidatePage (on
2374 * conflict), and Trap0eHandler.
2375 *
2376 * A precondition for this method is that the shadow PDE is not present. The
2377 * caller must take the PGM lock before checking this and continue to hold it
2378 * when calling this method.
2379 *
2380 * @returns VBox status code.
2381 * @param pVCpu The cross context virtual CPU structure.
2382 * @param iPDSrc Page directory index.
2383 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2384 * Assume this is a temporary mapping.
2385 * @param GCPtrPage GC Pointer of the page that caused the fault
2386 */
2387static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2388{
2389 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2390 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2391
2392#if 0 /* rarely useful; leave for debugging. */
2393 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2394#endif
2395 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2396
2397 PGM_LOCK_ASSERT_OWNER(pVM);
2398
2399#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2400 || PGM_GST_TYPE == PGM_TYPE_PAE \
2401 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2402 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
2403 && PGM_SHW_TYPE != PGM_TYPE_NONE
2404 int rc = VINF_SUCCESS;
2405
2406 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2407
2408 /*
2409 * Some input validation first.
2410 */
2411 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2412
2413 /*
2414 * Get the relevant shadow PDE entry.
2415 */
2416# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2417 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2418 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
2419
2420 /* Fetch the pgm pool shadow descriptor. */
2421 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2422 Assert(pShwPde);
2423
2424# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2425 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2426 PPGMPOOLPAGE pShwPde = NULL;
2427 PX86PDPAE pPDDst;
2428 PSHWPDE pPdeDst;
2429
2430 /* Fetch the pgm pool shadow descriptor. */
2431 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
2432 AssertRCSuccessReturn(rc, rc);
2433 Assert(pShwPde);
2434
2435 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
2436 pPdeDst = &pPDDst->a[iPDDst];
2437
2438# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2439 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2440 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2441 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2442 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2443 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2444 AssertRCSuccessReturn(rc, rc);
2445 Assert(pPDDst);
2446 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2447
2448# endif
2449 SHWPDE PdeDst = *pPdeDst;
2450
2451# if PGM_GST_TYPE == PGM_TYPE_AMD64
2452 /* Fetch the pgm pool shadow descriptor. */
2453 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2454 Assert(pShwPde);
2455# endif
2456
2457 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P.*/
2458
2459 /*
2460 * Sync the page directory entry.
2461 */
2462 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2463 const bool fPageTable = !(PdeSrc.u & X86_PDE_PS) || !GST_IS_PSE_ACTIVE(pVCpu);
2464 if ( (PdeSrc.u & X86_PDE_P)
2465 && (fPageTable ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc)) )
2466 {
2467 /*
2468 * Allocate & map the page table.
2469 */
2470 PSHWPT pPTDst;
2471 PPGMPOOLPAGE pShwPage;
2472 RTGCPHYS GCPhys;
2473 if (fPageTable)
2474 {
2475 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
2476# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2477 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2478 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
2479# endif
2480 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
2481 pShwPde->idx, iPDDst, false /*fLockPage*/,
2482 &pShwPage);
2483 }
2484 else
2485 {
2486 PGMPOOLACCESS enmAccess;
2487# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2488 const bool fNoExecute = (PdeSrc.u & X86_PDE_PAE_NX) && GST_IS_NX_ACTIVE(pVCpu);
2489# else
2490 const bool fNoExecute = false;
2491# endif
2492
2493 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
2494# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2495 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2496 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
2497# endif
2498 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2499 if (PdeSrc.u & X86_PDE_US)
2500 {
2501 if (PdeSrc.u & X86_PDE_RW)
2502 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2503 else
2504 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2505 }
2506 else
2507 {
2508 if (PdeSrc.u & X86_PDE_RW)
2509 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2510 else
2511 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2512 }
2513 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, PGM_A20_IS_ENABLED(pVCpu),
2514 pShwPde->idx, iPDDst, false /*fLockPage*/,
2515 &pShwPage);
2516 }
2517 if (rc == VINF_SUCCESS)
2518 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2519 else if (rc == VINF_PGM_CACHED_PAGE)
2520 {
2521 /*
2522 * The PT was cached, just hook it up.
2523 */
2524 if (fPageTable)
2525 PdeDst.u = pShwPage->Core.Key | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2526 else
2527 {
2528 PdeDst.u = pShwPage->Core.Key | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2529 /* (see explanation and assumptions further down.) */
2530 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
2531 {
2532 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2533 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2534 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2535 }
2536 }
2537 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2538 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2539 return VINF_SUCCESS;
2540 }
2541 else
2542 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
2543 /** @todo Why do we bother preserving X86_PDE_AVL_MASK here?
2544 * Both PGM_PDFLAGS_MAPPING and PGM_PDFLAGS_TRACK_DIRTY should be
2545 * irrelevant at this point. */
2546 PdeDst.u &= X86_PDE_AVL_MASK;
2547 PdeDst.u |= pShwPage->Core.Key;
2548
2549 /*
2550 * Page directory has been accessed (this is a fault situation, remember).
2551 */
2552 /** @todo
2553 * Well, when the caller is PrefetchPage or InvalidatePage is isn't a
2554 * fault situation. What's more, the Trap0eHandler has already set the
2555 * accessed bit. So, it's actually just VerifyAccessSyncPage which
2556 * might need setting the accessed flag.
2557 *
2558 * The best idea is to leave this change to the caller and add an
2559 * assertion that it's set already. */
2560 pPDSrc->a[iPDSrc].u |= X86_PDE_A;
2561 if (fPageTable)
2562 {
2563 /*
2564 * Page table - 4KB.
2565 *
2566 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2567 */
2568 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2569 GCPtrPage, PdeSrc.u & X86_PTE_P, !!(PdeSrc.u & X86_PTE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u));
2570 PGSTPT pPTSrc;
2571 rc = PGM_GCPHYS_2_PTR(pVM, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
2572 if (RT_SUCCESS(rc))
2573 {
2574 /*
2575 * Start by syncing the page directory entry so CSAM's TLB trick works.
2576 */
2577 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2578 | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2579 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2580 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2581
2582 /*
2583 * Directory/page user or supervisor privilege: (same goes for read/write)
2584 *
2585 * Directory Page Combined
2586 * U/S U/S U/S
2587 * 0 0 0
2588 * 0 1 0
2589 * 1 0 0
2590 * 1 1 1
2591 *
2592 * Simple AND operation. Table listed for completeness.
2593 *
2594 */
2595 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4K));
2596# ifdef PGM_SYNC_N_PAGES
2597 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2598 unsigned iPTDst = iPTBase;
2599 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2600 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2601 iPTDst = 0;
2602 else
2603 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2604# else /* !PGM_SYNC_N_PAGES */
2605 unsigned iPTDst = 0;
2606 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2607# endif /* !PGM_SYNC_N_PAGES */
2608 RTGCPTR GCPtrCur = (GCPtrPage & ~(RTGCPTR)((1 << SHW_PD_SHIFT) - 1))
2609 | ((RTGCPTR)iPTDst << GUEST_PAGE_SHIFT);
2610# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2611 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2612 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2613# else
2614 const unsigned offPTSrc = 0;
2615# endif
2616 for (; iPTDst < iPTDstEnd; iPTDst++, GCPtrCur += GUEST_PAGE_SIZE)
2617 {
2618 const unsigned iPTSrc = iPTDst + offPTSrc;
2619 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2620 if (PteSrc.u & X86_PTE_P)
2621 {
2622 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2623 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2624 GCPtrCur,
2625 PteSrc.u & X86_PTE_P,
2626 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
2627 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
2628 (uint64_t)PteSrc.u,
2629 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : "", SHW_PTE_LOG64(pPTDst->a[iPTDst]), iPTSrc, PdeSrc.au32[0],
2630 (RTGCPHYS)(GST_GET_PDE_GCPHYS(PdeSrc) + iPTSrc*sizeof(PteSrc)) ));
2631 }
2632 /* else: the page table was cleared by the pool */
2633 } /* for PTEs */
2634 }
2635 }
2636 else
2637 {
2638 /*
2639 * Big page - 2/4MB.
2640 *
2641 * We'll walk the ram range list in parallel and optimize lookups.
2642 * We will only sync one shadow page table at a time.
2643 */
2644 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4M));
2645
2646 /**
2647 * @todo It might be more efficient to sync only a part of the 4MB
2648 * page (similar to what we do for 4KB PDs).
2649 */
2650
2651 /*
2652 * Start by syncing the page directory entry.
2653 */
2654 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2655 | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2656
2657 /*
2658 * If the page is not flagged as dirty and is writable, then make it read-only
2659 * at PD level, so we can set the dirty bit when the page is modified.
2660 *
2661 * ASSUMES that page access handlers are implemented on page table entry level.
2662 * Thus we will first catch the dirty access and set PDE.D and restart. If
2663 * there is an access handler, we'll trap again and let it work on the problem.
2664 */
2665 /** @todo move the above stuff to a section in the PGM documentation. */
2666 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2667 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
2668 {
2669 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2670 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2671 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2672 }
2673 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2674 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2675
2676 /*
2677 * Fill the shadow page table.
2678 */
2679 /* Get address and flags from the source PDE. */
2680 SHWPTE PteDstBase;
2681 SHW_PTE_SET(PteDstBase, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc));
2682
2683 /* Loop thru the entries in the shadow PT. */
2684 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2685 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2686 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u, GCPtr,
2687 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2688 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
2689 unsigned iPTDst = 0;
2690 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2691 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2692 {
2693 if (pRam && GCPhys >= pRam->GCPhys)
2694 {
2695# ifndef PGM_WITH_A20
2696 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT;
2697# endif
2698 do
2699 {
2700 /* Make shadow PTE. */
2701# ifdef PGM_WITH_A20
2702 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT];
2703# else
2704 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2705# endif
2706 SHWPTE PteDst;
2707
2708# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2709 /* Try to make the page writable if necessary. */
2710 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2711 && ( PGM_PAGE_IS_ZERO(pPage)
2712 || ( SHW_PTE_IS_RW(PteDstBase)
2713 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2714# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2715 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2716# endif
2717# ifdef VBOX_WITH_PAGE_SHARING
2718 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2719# endif
2720 && !PGM_PAGE_IS_BALLOONED(pPage))
2721 )
2722 )
2723 {
2724 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2725 AssertRCReturn(rc, rc);
2726 if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2727 break;
2728 }
2729# endif
2730
2731 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2732 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, SHW_PTE_GET_U(PteDstBase), &PteDst);
2733 else if (PGM_PAGE_IS_BALLOONED(pPage))
2734 SHW_PTE_SET(PteDst, 0); /* Handle ballooned pages at #PF time. */
2735 else
2736 SHW_PTE_SET(PteDst, PGM_PAGE_GET_HCPHYS(pPage) | SHW_PTE_GET_U(PteDstBase));
2737
2738 /* Only map writable pages writable. */
2739 if ( SHW_PTE_IS_P_RW(PteDst)
2740 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2741 {
2742 /* Still applies to shared pages. */
2743 Assert(!PGM_PAGE_IS_ZERO(pPage));
2744 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2745 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2746 }
2747
2748 if (SHW_PTE_IS_P(PteDst))
2749 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2750
2751 /* commit it (not atomic, new table) */
2752 pPTDst->a[iPTDst] = PteDst;
2753 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2754 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), SHW_PTE_IS_P(PteDst), SHW_PTE_IS_RW(PteDst), SHW_PTE_IS_US(PteDst), SHW_PTE_LOG64(PteDst),
2755 SHW_PTE_IS_TRACK_DIRTY(PteDst) ? " Track-Dirty" : ""));
2756
2757 /* advance */
2758 GCPhys += GUEST_PAGE_SIZE;
2759 PGM_A20_APPLY_TO_VAR(pVCpu, GCPhys);
2760# ifndef PGM_WITH_A20
2761 iHCPage++;
2762# endif
2763 iPTDst++;
2764 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2765 && GCPhys <= pRam->GCPhysLast);
2766
2767 /* Advance ram range list. */
2768 while (pRam && GCPhys > pRam->GCPhysLast)
2769 pRam = pRam->CTX_SUFF(pNext);
2770 }
2771 else if (pRam)
2772 {
2773 Log(("Invalid pages at %RGp\n", GCPhys));
2774 do
2775 {
2776 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2777 GCPhys += GUEST_PAGE_SIZE;
2778 iPTDst++;
2779 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2780 && GCPhys < pRam->GCPhys);
2781 PGM_A20_APPLY_TO_VAR(pVCpu,GCPhys);
2782 }
2783 else
2784 {
2785 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2786 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2787 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2788 }
2789 } /* while more PTEs */
2790 } /* 4KB / 4MB */
2791 }
2792 else
2793 AssertRelease(!SHW_PDE_IS_P(PdeDst));
2794
2795 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2796 if (RT_FAILURE(rc))
2797 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
2798 return rc;
2799
2800#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2801 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
2802 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2803 && PGM_SHW_TYPE != PGM_TYPE_NONE
2804 NOREF(iPDSrc); NOREF(pPDSrc);
2805
2806 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2807
2808 /*
2809 * Validate input a little bit.
2810 */
2811 int rc = VINF_SUCCESS;
2812# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2813 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2814 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
2815
2816 /* Fetch the pgm pool shadow descriptor. */
2817 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2818 Assert(pShwPde);
2819
2820# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2821 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2822 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
2823 PX86PDPAE pPDDst;
2824 PSHWPDE pPdeDst;
2825
2826 /* Fetch the pgm pool shadow descriptor. */
2827 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
2828 AssertRCSuccessReturn(rc, rc);
2829 Assert(pShwPde);
2830
2831 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
2832 pPdeDst = &pPDDst->a[iPDDst];
2833
2834# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2835 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2836 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2837 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2838 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
2839 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2840 AssertRCSuccessReturn(rc, rc);
2841 Assert(pPDDst);
2842 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2843
2844 /* Fetch the pgm pool shadow descriptor. */
2845 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2846 Assert(pShwPde);
2847
2848# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2849 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2850 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2851 PEPTPD pPDDst;
2852 PEPTPDPT pPdptDst;
2853
2854 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
2855 if (rc != VINF_SUCCESS)
2856 {
2857 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2858 AssertRC(rc);
2859 return rc;
2860 }
2861 Assert(pPDDst);
2862 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2863
2864 /* Fetch the pgm pool shadow descriptor. */
2865 /** @todo r=bird: didn't pgmShwGetEPTPDPtr just do this lookup already? */
2866 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2867 Assert(pShwPde);
2868# endif
2869 SHWPDE PdeDst = *pPdeDst;
2870
2871 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2872
2873# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
2874 if ( BTH_IS_NP_ACTIVE(pVM)
2875 && !VM_IS_NEM_ENABLED(pVM)) /** @todo NEM: Large page support. */
2876 {
2877 /* Check if we allocated a big page before for this 2 MB range. */
2878 PPGMPAGE pPage;
2879 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PDE2M_PAE_PG_MASK), &pPage);
2880 if (RT_SUCCESS(rc))
2881 {
2882 RTHCPHYS HCPhys = NIL_RTHCPHYS;
2883 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
2884 {
2885 if (PGM_A20_IS_ENABLED(pVCpu))
2886 {
2887 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
2888 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2889 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2890 }
2891 else
2892 {
2893 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
2894 pVM->pgm.s.cLargePagesDisabled++;
2895 }
2896 }
2897 else if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED
2898 && PGM_A20_IS_ENABLED(pVCpu))
2899 {
2900 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
2901 rc = pgmPhysRecheckLargePage(pVM, GCPtrPage, pPage);
2902 if (RT_SUCCESS(rc))
2903 {
2904 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2905 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
2906 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2907 }
2908 }
2909 else if ( PGMIsUsingLargePages(pVM)
2910 && PGM_A20_IS_ENABLED(pVCpu))
2911 {
2912 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
2913 if (RT_SUCCESS(rc))
2914 {
2915 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2916 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
2917 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2918 }
2919 else
2920 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
2921 }
2922
2923 if (HCPhys != NIL_RTHCPHYS)
2924 {
2925# if PGM_SHW_TYPE == PGM_TYPE_EPT
2926 PdeDst.u = HCPhys | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_LEAF | EPT_E_IGNORE_PAT | EPT_E_MEMTYPE_WB
2927 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo do we need this? */;
2928# else
2929 PdeDst.u = HCPhys | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PS
2930 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo PGM_PD_FLAGS? */;
2931# endif
2932 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2933
2934 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
2935 /* Add a reference to the first page only. */
2936 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
2937
2938 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2939 return VINF_SUCCESS;
2940 }
2941 }
2942 }
2943# endif /* defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE */
2944
2945 /*
2946 * Allocate & map the page table.
2947 */
2948 PSHWPT pPTDst;
2949 PPGMPOOLPAGE pShwPage;
2950 RTGCPHYS GCPhys;
2951
2952 /* Virtual address = physical address */
2953 GCPhys = PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PAGE_4K_BASE_MASK);
2954 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE,
2955 PGM_A20_IS_ENABLED(pVCpu), pShwPde->idx, iPDDst, false /*fLockPage*/,
2956 &pShwPage);
2957 if ( rc == VINF_SUCCESS
2958 || rc == VINF_PGM_CACHED_PAGE)
2959 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2960 else
2961 {
2962 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2963 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
2964 }
2965
2966 if (rc == VINF_SUCCESS)
2967 {
2968 /* New page table; fully set it up. */
2969 Assert(pPTDst);
2970
2971 /* Mask away the page offset. */
2972 GCPtrPage &= ~(RTGCPTR)GUEST_PAGE_OFFSET_MASK;
2973
2974 for (unsigned iPTDst = 0; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2975 {
2976 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2977 | (iPTDst << GUEST_PAGE_SHIFT));
2978
2979 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2980 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
2981 GCPtrCurPage,
2982 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2983 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2984
2985 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
2986 break;
2987 }
2988 }
2989 else
2990 rc = VINF_SUCCESS; /* Cached entry; assume it's still fully valid. */
2991
2992 /* Save the new PDE. */
2993# if PGM_SHW_TYPE == PGM_TYPE_EPT
2994 PdeDst.u = pShwPage->Core.Key | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE
2995 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo do we really need this? */);
2996# else
2997 PdeDst.u = pShwPage->Core.Key | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A
2998 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo use a PGM_PD_FLAGS define */);
2999# endif
3000 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3001
3002 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3003 if (RT_FAILURE(rc))
3004 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
3005 return rc;
3006
3007#else
3008 NOREF(iPDSrc); NOREF(pPDSrc);
3009 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3010 return VERR_PGM_NOT_USED_IN_MODE;
3011#endif
3012}
3013
3014
3015
3016/**
3017 * Prefetch a page/set of pages.
3018 *
3019 * Typically used to sync commonly used pages before entering raw mode
3020 * after a CR3 reload.
3021 *
3022 * @returns VBox status code.
3023 * @param pVCpu The cross context virtual CPU structure.
3024 * @param GCPtrPage Page to invalidate.
3025 */
3026PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
3027{
3028#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3029 || PGM_GST_TYPE == PGM_TYPE_REAL \
3030 || PGM_GST_TYPE == PGM_TYPE_PROT \
3031 || PGM_GST_TYPE == PGM_TYPE_PAE \
3032 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3033 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3034 && PGM_SHW_TYPE != PGM_TYPE_NONE
3035 /*
3036 * Check that all Guest levels thru the PDE are present, getting the
3037 * PD and PDE in the processes.
3038 */
3039 int rc = VINF_SUCCESS;
3040# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3041# if PGM_GST_TYPE == PGM_TYPE_32BIT
3042 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
3043 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3044# elif PGM_GST_TYPE == PGM_TYPE_PAE
3045 unsigned iPDSrc;
3046 X86PDPE PdpeSrc;
3047 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3048 if (!pPDSrc)
3049 return VINF_SUCCESS; /* not present */
3050# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3051 unsigned iPDSrc;
3052 PX86PML4E pPml4eSrc;
3053 X86PDPE PdpeSrc;
3054 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3055 if (!pPDSrc)
3056 return VINF_SUCCESS; /* not present */
3057# endif
3058 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3059# else
3060 PGSTPD pPDSrc = NULL;
3061 const unsigned iPDSrc = 0;
3062 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
3063# endif
3064
3065 if ((PdeSrc.u & (X86_PDE_P | X86_PDE_A)) == (X86_PDE_P | X86_PDE_A))
3066 {
3067 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3068 PGM_LOCK_VOID(pVM);
3069
3070# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3071 const X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
3072# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3073 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3074 PX86PDPAE pPDDst;
3075 X86PDEPAE PdeDst;
3076# if PGM_GST_TYPE != PGM_TYPE_PAE
3077 X86PDPE PdpeSrc;
3078
3079 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3080 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3081# endif
3082 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3083 if (rc != VINF_SUCCESS)
3084 {
3085 PGM_UNLOCK(pVM);
3086 AssertRC(rc);
3087 return rc;
3088 }
3089 Assert(pPDDst);
3090 PdeDst = pPDDst->a[iPDDst];
3091
3092# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3093 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3094 PX86PDPAE pPDDst;
3095 X86PDEPAE PdeDst;
3096
3097# if PGM_GST_TYPE == PGM_TYPE_PROT
3098 /* AMD-V nested paging */
3099 X86PML4E Pml4eSrc;
3100 X86PDPE PdpeSrc;
3101 PX86PML4E pPml4eSrc = &Pml4eSrc;
3102
3103 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3104 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3105 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3106# endif
3107
3108 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3109 if (rc != VINF_SUCCESS)
3110 {
3111 PGM_UNLOCK(pVM);
3112 AssertRC(rc);
3113 return rc;
3114 }
3115 Assert(pPDDst);
3116 PdeDst = pPDDst->a[iPDDst];
3117# endif
3118 if (!(PdeDst.u & X86_PDE_P))
3119 {
3120 /** @todo r=bird: This guy will set the A bit on the PDE,
3121 * probably harmless. */
3122 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3123 }
3124 else
3125 {
3126 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3127 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3128 * makes no sense to prefetch more than one page.
3129 */
3130 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3131 if (RT_SUCCESS(rc))
3132 rc = VINF_SUCCESS;
3133 }
3134 PGM_UNLOCK(pVM);
3135 }
3136 return rc;
3137
3138#elif PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3139 NOREF(pVCpu); NOREF(GCPtrPage);
3140 return VINF_SUCCESS; /* ignore */
3141#else
3142 AssertCompile(0);
3143#endif
3144}
3145
3146
3147
3148
3149/**
3150 * Syncs a page during a PGMVerifyAccess() call.
3151 *
3152 * @returns VBox status code (informational included).
3153 * @param pVCpu The cross context virtual CPU structure.
3154 * @param GCPtrPage The address of the page to sync.
3155 * @param fPage The effective guest page flags.
3156 * @param uErr The trap error code.
3157 * @remarks This will normally never be called on invalid guest page
3158 * translation entries.
3159 */
3160PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3161{
3162 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
3163
3164 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3165 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(fPage); RT_NOREF_PV(uErr);
3166
3167 Assert(!pVM->pgm.s.fNestedPaging);
3168#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3169 || PGM_GST_TYPE == PGM_TYPE_REAL \
3170 || PGM_GST_TYPE == PGM_TYPE_PROT \
3171 || PGM_GST_TYPE == PGM_TYPE_PAE \
3172 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3173 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3174 && PGM_SHW_TYPE != PGM_TYPE_NONE
3175
3176 /*
3177 * Get guest PD and index.
3178 */
3179 /** @todo Performance: We've done all this a jiffy ago in the
3180 * PGMGstGetPage call. */
3181# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3182# if PGM_GST_TYPE == PGM_TYPE_32BIT
3183 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
3184 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3185
3186# elif PGM_GST_TYPE == PGM_TYPE_PAE
3187 unsigned iPDSrc = 0;
3188 X86PDPE PdpeSrc;
3189 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3190 if (RT_UNLIKELY(!pPDSrc))
3191 {
3192 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3193 return VINF_EM_RAW_GUEST_TRAP;
3194 }
3195
3196# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3197 unsigned iPDSrc = 0; /* shut up gcc */
3198 PX86PML4E pPml4eSrc = NULL; /* ditto */
3199 X86PDPE PdpeSrc;
3200 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3201 if (RT_UNLIKELY(!pPDSrc))
3202 {
3203 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3204 return VINF_EM_RAW_GUEST_TRAP;
3205 }
3206# endif
3207
3208# else /* !PGM_WITH_PAGING */
3209 PGSTPD pPDSrc = NULL;
3210 const unsigned iPDSrc = 0;
3211# endif /* !PGM_WITH_PAGING */
3212 int rc = VINF_SUCCESS;
3213
3214 PGM_LOCK_VOID(pVM);
3215
3216 /*
3217 * First check if the shadow pd is present.
3218 */
3219# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3220 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3221
3222# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3223 PX86PDEPAE pPdeDst;
3224 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3225 PX86PDPAE pPDDst;
3226# if PGM_GST_TYPE != PGM_TYPE_PAE
3227 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3228 X86PDPE PdpeSrc;
3229 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3230# endif
3231 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3232 if (rc != VINF_SUCCESS)
3233 {
3234 PGM_UNLOCK(pVM);
3235 AssertRC(rc);
3236 return rc;
3237 }
3238 Assert(pPDDst);
3239 pPdeDst = &pPDDst->a[iPDDst];
3240
3241# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3242 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3243 PX86PDPAE pPDDst;
3244 PX86PDEPAE pPdeDst;
3245
3246# if PGM_GST_TYPE == PGM_TYPE_PROT
3247 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3248 X86PML4E Pml4eSrc;
3249 X86PDPE PdpeSrc;
3250 PX86PML4E pPml4eSrc = &Pml4eSrc;
3251 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3252 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3253# endif
3254
3255 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3256 if (rc != VINF_SUCCESS)
3257 {
3258 PGM_UNLOCK(pVM);
3259 AssertRC(rc);
3260 return rc;
3261 }
3262 Assert(pPDDst);
3263 pPdeDst = &pPDDst->a[iPDDst];
3264# endif
3265
3266 if (!(pPdeDst->u & X86_PDE_P))
3267 {
3268 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3269 if (rc != VINF_SUCCESS)
3270 {
3271 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3272 PGM_UNLOCK(pVM);
3273 AssertRC(rc);
3274 return rc;
3275 }
3276 }
3277
3278# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3279 /* Check for dirty bit fault */
3280 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3281 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3282 Log(("PGMVerifyAccess: success (dirty)\n"));
3283 else
3284# endif
3285 {
3286# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3287 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3288# else
3289 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
3290# endif
3291
3292 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3293 if (uErr & X86_TRAP_PF_US)
3294 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
3295 else /* supervisor */
3296 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3297
3298 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3299 if (RT_SUCCESS(rc))
3300 {
3301 /* Page was successfully synced */
3302 Log2(("PGMVerifyAccess: success (sync)\n"));
3303 rc = VINF_SUCCESS;
3304 }
3305 else
3306 {
3307 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
3308 rc = VINF_EM_RAW_GUEST_TRAP;
3309 }
3310 }
3311 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3312 PGM_UNLOCK(pVM);
3313 return rc;
3314
3315#else /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
3316
3317 AssertLogRelMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3318 return VERR_PGM_NOT_USED_IN_MODE;
3319#endif /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
3320}
3321
3322
3323/**
3324 * Syncs the paging hierarchy starting at CR3.
3325 *
3326 * @returns VBox status code, R0/RC may return VINF_PGM_SYNC_CR3, no other
3327 * informational status codes.
3328 * @retval VERR_PGM_NO_HYPERVISOR_ADDRESS in raw-mode when we're unable to map
3329 * the VMM into guest context.
3330 * @param pVCpu The cross context virtual CPU structure.
3331 * @param cr0 Guest context CR0 register.
3332 * @param cr3 Guest context CR3 register. Not subjected to the A20
3333 * mask.
3334 * @param cr4 Guest context CR4 register.
3335 * @param fGlobal Including global page directories or not
3336 */
3337PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3338{
3339 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
3340 NOREF(cr0); NOREF(cr3); NOREF(cr4); NOREF(fGlobal);
3341
3342 LogFlow(("SyncCR3 FF=%d fGlobal=%d\n", !!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), fGlobal));
3343
3344#if !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
3345# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3346 PGM_LOCK_VOID(pVM);
3347 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3348 if (pPool->cDirtyPages)
3349 pgmPoolResetDirtyPages(pVM);
3350 PGM_UNLOCK(pVM);
3351# endif
3352#endif /* !NESTED && !EPT */
3353
3354#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3355 /*
3356 * Nested / EPT / None - No work.
3357 */
3358 return VINF_SUCCESS;
3359
3360#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3361 /*
3362 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3363 * out the shadow parts when the guest modifies its tables.
3364 */
3365 return VINF_SUCCESS;
3366
3367#else /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3368
3369 return VINF_SUCCESS;
3370#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3371}
3372
3373
3374
3375
3376#ifdef VBOX_STRICT
3377
3378/**
3379 * Checks that the shadow page table is in sync with the guest one.
3380 *
3381 * @returns The number of errors.
3382 * @param pVCpu The cross context virtual CPU structure.
3383 * @param cr3 Guest context CR3 register.
3384 * @param cr4 Guest context CR4 register.
3385 * @param GCPtr Where to start. Defaults to 0.
3386 * @param cb How much to check. Defaults to everything.
3387 */
3388PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3389{
3390 NOREF(pVCpu); NOREF(cr3); NOREF(cr4); NOREF(GCPtr); NOREF(cb);
3391#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3392 return 0;
3393#else
3394 unsigned cErrors = 0;
3395 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3396 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3397
3398# if PGM_GST_TYPE == PGM_TYPE_PAE
3399 /** @todo currently broken; crashes below somewhere */
3400 AssertFailed();
3401# endif
3402
3403# if PGM_GST_TYPE == PGM_TYPE_32BIT \
3404 || PGM_GST_TYPE == PGM_TYPE_PAE \
3405 || PGM_GST_TYPE == PGM_TYPE_AMD64
3406
3407 bool fBigPagesSupported = GST_IS_PSE_ACTIVE(pVCpu);
3408 PPGMCPU pPGM = &pVCpu->pgm.s;
3409 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3410 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3411# ifndef IN_RING0
3412 RTHCPHYS HCPhys; /* general usage. */
3413# endif
3414 int rc;
3415
3416 /*
3417 * Check that the Guest CR3 and all its mappings are correct.
3418 */
3419 AssertMsgReturn(pPGM->GCPhysCR3 == PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK),
3420 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3421 false);
3422# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3423# if 0
3424# if PGM_GST_TYPE == PGM_TYPE_32BIT
3425 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3426# else
3427 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3428# endif
3429 AssertRCReturn(rc, 1);
3430 HCPhys = NIL_RTHCPHYS;
3431 rc = pgmRamGCPhys2HCPhys(pVM, PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK), &HCPhys);
3432 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3433# endif
3434# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3435 pgmGstGet32bitPDPtr(pVCpu);
3436 RTGCPHYS GCPhys;
3437 rc = PGMR3DbgR3Ptr2GCPhys(pVM->pUVM, pPGM->pGst32BitPdR3, &GCPhys);
3438 AssertRCReturn(rc, 1);
3439 AssertMsgReturn(PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3440# endif
3441# endif /* !IN_RING0 */
3442
3443 /*
3444 * Get and check the Shadow CR3.
3445 */
3446# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3447 unsigned cPDEs = X86_PG_ENTRIES;
3448 unsigned cIncrement = X86_PG_ENTRIES * GUEST_PAGE_SIZE;
3449# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3450# if PGM_GST_TYPE == PGM_TYPE_32BIT
3451 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3452# else
3453 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3454# endif
3455 unsigned cIncrement = X86_PG_PAE_ENTRIES * GUEST_PAGE_SIZE;
3456# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3457 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3458 unsigned cIncrement = X86_PG_PAE_ENTRIES * GUEST_PAGE_SIZE;
3459# endif
3460 if (cb != ~(RTGCPTR)0)
3461 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3462
3463/** @todo call the other two PGMAssert*() functions. */
3464
3465# if PGM_GST_TYPE == PGM_TYPE_AMD64
3466 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3467
3468 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3469 {
3470 PPGMPOOLPAGE pShwPdpt = NULL;
3471 PX86PML4E pPml4eSrc;
3472 PX86PML4E pPml4eDst;
3473 RTGCPHYS GCPhysPdptSrc;
3474
3475 pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
3476 pPml4eDst = pgmShwGetLongModePML4EPtr(pVCpu, iPml4);
3477
3478 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3479 if (!(pPml4eDst->u & X86_PML4E_P))
3480 {
3481 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3482 continue;
3483 }
3484
3485 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3486 GCPhysPdptSrc = PGM_A20_APPLY(pVCpu, pPml4eSrc->u & X86_PML4E_PG_MASK);
3487
3488 if ((pPml4eSrc->u & X86_PML4E_P) != (pPml4eDst->u & X86_PML4E_P))
3489 {
3490 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3491 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3492 cErrors++;
3493 continue;
3494 }
3495
3496 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3497 {
3498 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3499 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3500 cErrors++;
3501 continue;
3502 }
3503
3504 if ( (pPml4eDst->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX))
3505 != (pPml4eSrc->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX)))
3506 {
3507 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3508 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3509 cErrors++;
3510 continue;
3511 }
3512# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3513 {
3514# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3515
3516# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3517 /*
3518 * Check the PDPTEs too.
3519 */
3520 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3521
3522 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3523 {
3524 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3525 PPGMPOOLPAGE pShwPde = NULL;
3526 PX86PDPE pPdpeDst;
3527 RTGCPHYS GCPhysPdeSrc;
3528 X86PDPE PdpeSrc;
3529 PdpeSrc.u = 0; /* initialized to shut up gcc 4.5 */
3530# if PGM_GST_TYPE == PGM_TYPE_PAE
3531 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
3532 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
3533# else
3534 PX86PML4E pPml4eSrcIgn;
3535 PX86PDPT pPdptDst;
3536 PX86PDPAE pPDDst;
3537 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3538
3539 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3540 if (rc != VINF_SUCCESS)
3541 {
3542 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3543 GCPtr += 512 * _2M;
3544 continue; /* next PDPTE */
3545 }
3546 Assert(pPDDst);
3547# endif
3548 Assert(iPDSrc == 0);
3549
3550 pPdpeDst = &pPdptDst->a[iPdpt];
3551
3552 if (!(pPdpeDst->u & X86_PDPE_P))
3553 {
3554 GCPtr += 512 * _2M;
3555 continue; /* next PDPTE */
3556 }
3557
3558 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3559 GCPhysPdeSrc = PGM_A20_APPLY(pVCpu, PdpeSrc.u & X86_PDPE_PG_MASK);
3560
3561 if ((pPdpeDst->u & X86_PDPE_P) != (PdpeSrc.u & X86_PDPE_P))
3562 {
3563 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3564 GCPtr += 512 * _2M;
3565 cErrors++;
3566 continue;
3567 }
3568
3569 if (GCPhysPdeSrc != pShwPde->GCPhys)
3570 {
3571# if PGM_GST_TYPE == PGM_TYPE_AMD64
3572 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3573# else
3574 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3575# endif
3576 GCPtr += 512 * _2M;
3577 cErrors++;
3578 continue;
3579 }
3580
3581# if PGM_GST_TYPE == PGM_TYPE_AMD64
3582 if ( (pPdpeDst->u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX))
3583 != (PdpeSrc.u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX)))
3584 {
3585 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3586 GCPtr += 512 * _2M;
3587 cErrors++;
3588 continue;
3589 }
3590# endif
3591
3592# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3593 {
3594# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3595# if PGM_GST_TYPE == PGM_TYPE_32BIT
3596 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3597# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3598 PCX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
3599# endif
3600# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3601 /*
3602 * Iterate the shadow page directory.
3603 */
3604 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3605 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3606
3607 for (;
3608 iPDDst < cPDEs;
3609 iPDDst++, GCPtr += cIncrement)
3610 {
3611# if PGM_SHW_TYPE == PGM_TYPE_PAE
3612 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pVCpu, GCPtr);
3613# else
3614 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3615# endif
3616 if ( (PdeDst.u & X86_PDE_P)
3617 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) )
3618 {
3619 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3620 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3621 if (!pPoolPage)
3622 {
3623 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3624 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3625 cErrors++;
3626 continue;
3627 }
3628 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pPoolPage);
3629
3630 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3631 {
3632 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3633 GCPtr, (uint64_t)PdeDst.u));
3634 cErrors++;
3635 }
3636
3637 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3638 {
3639 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3640 GCPtr, (uint64_t)PdeDst.u));
3641 cErrors++;
3642 }
3643
3644 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3645 if (!(PdeSrc.u & X86_PDE_P))
3646 {
3647 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3648 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3649 cErrors++;
3650 continue;
3651 }
3652
3653 if ( !(PdeSrc.u & X86_PDE_PS)
3654 || !fBigPagesSupported)
3655 {
3656 GCPhysGst = GST_GET_PDE_GCPHYS(PdeSrc);
3657# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3658 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
3659# endif
3660 }
3661 else
3662 {
3663# if PGM_GST_TYPE == PGM_TYPE_32BIT
3664 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3665 {
3666 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3667 GCPtr, (uint64_t)PdeSrc.u));
3668 cErrors++;
3669 continue;
3670 }
3671# endif
3672 GCPhysGst = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
3673# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3674 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | (GCPtr & RT_BIT(X86_PAGE_2M_SHIFT)));
3675# endif
3676 }
3677
3678 if ( pPoolPage->enmKind
3679 != (!(PdeSrc.u & X86_PDE_PS) || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3680 {
3681 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3682 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3683 cErrors++;
3684 }
3685
3686 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
3687 if (!pPhysPage)
3688 {
3689 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3690 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3691 cErrors++;
3692 continue;
3693 }
3694
3695 if (GCPhysGst != pPoolPage->GCPhys)
3696 {
3697 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3698 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3699 cErrors++;
3700 continue;
3701 }
3702
3703 if ( !(PdeSrc.u & X86_PDE_PS)
3704 || !fBigPagesSupported)
3705 {
3706 /*
3707 * Page Table.
3708 */
3709 const GSTPT *pPTSrc;
3710 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, PGM_A20_APPLY(pVCpu, GCPhysGst & ~(RTGCPHYS)(GUEST_PAGE_SIZE - 1)),
3711 &pPTSrc);
3712 if (RT_FAILURE(rc))
3713 {
3714 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3715 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3716 cErrors++;
3717 continue;
3718 }
3719 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3720 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3721 {
3722 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3723 // (This problem will go away when/if we shadow multiple CR3s.)
3724 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3725 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3726 cErrors++;
3727 continue;
3728 }
3729 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3730 {
3731 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3732 GCPtr, (uint64_t)PdeDst.u));
3733 cErrors++;
3734 continue;
3735 }
3736
3737 /* iterate the page table. */
3738# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3739 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3740 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3741# else
3742 const unsigned offPTSrc = 0;
3743# endif
3744 for (unsigned iPT = 0, off = 0;
3745 iPT < RT_ELEMENTS(pPTDst->a);
3746 iPT++, off += GUEST_PAGE_SIZE)
3747 {
3748 const SHWPTE PteDst = pPTDst->a[iPT];
3749
3750 /* skip not-present and dirty tracked entries. */
3751 if (!(SHW_PTE_GET_U(PteDst) & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3752 continue;
3753 Assert(SHW_PTE_IS_P(PteDst));
3754
3755 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3756 if (!(PteSrc.u & X86_PTE_P))
3757 {
3758# ifdef IN_RING3
3759 PGMAssertHandlerAndFlagsInSync(pVM);
3760 DBGFR3PagingDumpEx(pVM->pUVM, pVCpu->idCpu, DBGFPGDMP_FLAGS_CURRENT_CR3 | DBGFPGDMP_FLAGS_CURRENT_MODE
3761 | DBGFPGDMP_FLAGS_GUEST | DBGFPGDMP_FLAGS_HEADER | DBGFPGDMP_FLAGS_PRINT_CR3,
3762 0, 0, UINT64_MAX, 99, NULL);
3763# endif
3764 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3765 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst), pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3766 (uint64_t)GST_GET_PDE_GCPHYS(PdeSrc) + (iPT + offPTSrc) * sizeof(PteSrc)));
3767 cErrors++;
3768 continue;
3769 }
3770
3771 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3772# if 1 /** @todo sync accessed bit properly... */
3773 fIgnoreFlags |= X86_PTE_A;
3774# endif
3775
3776 /* match the physical addresses */
3777 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
3778 GCPhysGst = GST_GET_PTE_GCPHYS(PteSrc);
3779
3780# ifdef IN_RING3
3781 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3782 if (RT_FAILURE(rc))
3783 {
3784 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3785 {
3786 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3787 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3788 cErrors++;
3789 continue;
3790 }
3791 }
3792 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3793 {
3794 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3795 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3796 cErrors++;
3797 continue;
3798 }
3799# endif
3800
3801 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
3802 if (!pPhysPage)
3803 {
3804# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3805 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3806 {
3807 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3808 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3809 cErrors++;
3810 continue;
3811 }
3812# endif
3813 if (SHW_PTE_IS_RW(PteDst))
3814 {
3815 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3816 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3817 cErrors++;
3818 }
3819 fIgnoreFlags |= X86_PTE_RW;
3820 }
3821 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
3822 {
3823 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3824 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3825 cErrors++;
3826 continue;
3827 }
3828
3829 /* flags */
3830 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3831 {
3832 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3833 {
3834 if (SHW_PTE_IS_RW(PteDst))
3835 {
3836 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3837 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3838 cErrors++;
3839 continue;
3840 }
3841 fIgnoreFlags |= X86_PTE_RW;
3842 }
3843 else
3844 {
3845 if ( SHW_PTE_IS_P(PteDst)
3846# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
3847 && !PGM_PAGE_IS_MMIO(pPhysPage)
3848# endif
3849 )
3850 {
3851 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3852 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3853 cErrors++;
3854 continue;
3855 }
3856 fIgnoreFlags |= X86_PTE_P;
3857 }
3858 }
3859 else
3860 {
3861 if ((PteSrc.u & (X86_PTE_RW | X86_PTE_D)) == X86_PTE_RW)
3862 {
3863 if (SHW_PTE_IS_RW(PteDst))
3864 {
3865 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3866 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3867 cErrors++;
3868 continue;
3869 }
3870 if (!SHW_PTE_IS_TRACK_DIRTY(PteDst))
3871 {
3872 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3873 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3874 cErrors++;
3875 continue;
3876 }
3877 if (SHW_PTE_IS_D(PteDst))
3878 {
3879 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3880 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3881 cErrors++;
3882 }
3883# if 0 /** @todo sync access bit properly... */
3884 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3885 {
3886 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3887 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3888 cErrors++;
3889 }
3890 fIgnoreFlags |= X86_PTE_RW;
3891# else
3892 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3893# endif
3894 }
3895 else if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
3896 {
3897 /* access bit emulation (not implemented). */
3898 if ((PteSrc.u & X86_PTE_A) || SHW_PTE_IS_P(PteDst))
3899 {
3900 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3901 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3902 cErrors++;
3903 continue;
3904 }
3905 if (!SHW_PTE_IS_A(PteDst))
3906 {
3907 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3908 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3909 cErrors++;
3910 }
3911 fIgnoreFlags |= X86_PTE_P;
3912 }
3913# ifdef DEBUG_sandervl
3914 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3915# endif
3916 }
3917
3918 if ( (PteSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
3919 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
3920 )
3921 {
3922 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3923 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
3924 fIgnoreFlags, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3925 cErrors++;
3926 continue;
3927 }
3928 } /* foreach PTE */
3929 }
3930 else
3931 {
3932 /*
3933 * Big Page.
3934 */
3935 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
3936 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
3937 {
3938 if (PdeDst.u & X86_PDE_RW)
3939 {
3940 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3941 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3942 cErrors++;
3943 continue;
3944 }
3945 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
3946 {
3947 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3948 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3949 cErrors++;
3950 continue;
3951 }
3952# if 0 /** @todo sync access bit properly... */
3953 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
3954 {
3955 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3956 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3957 cErrors++;
3958 }
3959 fIgnoreFlags |= X86_PTE_RW;
3960# else
3961 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3962# endif
3963 }
3964 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3965 {
3966 /* access bit emulation (not implemented). */
3967 if ((PdeSrc.u & X86_PDE_A) || SHW_PDE_IS_P(PdeDst))
3968 {
3969 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3970 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3971 cErrors++;
3972 continue;
3973 }
3974 if (!SHW_PDE_IS_A(PdeDst))
3975 {
3976 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3977 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3978 cErrors++;
3979 }
3980 fIgnoreFlags |= X86_PTE_P;
3981 }
3982
3983 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
3984 {
3985 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
3986 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
3987 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3988 cErrors++;
3989 }
3990
3991 /* iterate the page table. */
3992 for (unsigned iPT = 0, off = 0;
3993 iPT < RT_ELEMENTS(pPTDst->a);
3994 iPT++, off += GUEST_PAGE_SIZE, GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst + GUEST_PAGE_SIZE))
3995 {
3996 const SHWPTE PteDst = pPTDst->a[iPT];
3997
3998 if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
3999 {
4000 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4001 GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4002 cErrors++;
4003 }
4004
4005 /* skip not-present entries. */
4006 if (!SHW_PTE_IS_P(PteDst)) /** @todo deal with ALL handlers and CSAM !P pages! */
4007 continue;
4008
4009 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4010
4011 /* match the physical addresses */
4012 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
4013
4014# ifdef IN_RING3
4015 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4016 if (RT_FAILURE(rc))
4017 {
4018 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4019 {
4020 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4021 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4022 cErrors++;
4023 }
4024 }
4025 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4026 {
4027 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4028 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4029 cErrors++;
4030 continue;
4031 }
4032# endif
4033 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
4034 if (!pPhysPage)
4035 {
4036# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4037 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4038 {
4039 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4040 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4041 cErrors++;
4042 continue;
4043 }
4044# endif
4045 if (SHW_PTE_IS_RW(PteDst))
4046 {
4047 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4048 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4049 cErrors++;
4050 }
4051 fIgnoreFlags |= X86_PTE_RW;
4052 }
4053 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4054 {
4055 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4056 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4057 cErrors++;
4058 continue;
4059 }
4060
4061 /* flags */
4062 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4063 {
4064 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4065 {
4066 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4067 {
4068 if (SHW_PTE_IS_RW(PteDst))
4069 {
4070 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4071 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4072 cErrors++;
4073 continue;
4074 }
4075 fIgnoreFlags |= X86_PTE_RW;
4076 }
4077 }
4078 else
4079 {
4080 if ( SHW_PTE_IS_P(PteDst)
4081# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4082 && !PGM_PAGE_IS_MMIO(pPhysPage)
4083# endif
4084 )
4085 {
4086 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4087 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4088 cErrors++;
4089 continue;
4090 }
4091 fIgnoreFlags |= X86_PTE_P;
4092 }
4093 }
4094
4095 if ( (PdeSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4096 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags) /* lazy phys handler dereg. */
4097 )
4098 {
4099 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4100 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4101 fIgnoreFlags, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4102 cErrors++;
4103 continue;
4104 }
4105 } /* for each PTE */
4106 }
4107 }
4108 /* not present */
4109
4110 } /* for each PDE */
4111
4112 } /* for each PDPTE */
4113
4114 } /* for each PML4E */
4115
4116# ifdef DEBUG
4117 if (cErrors)
4118 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4119# endif
4120# endif /* GST is in {32BIT, PAE, AMD64} */
4121 return cErrors;
4122#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
4123}
4124#endif /* VBOX_STRICT */
4125
4126
4127/**
4128 * Sets up the CR3 for shadow paging
4129 *
4130 * @returns Strict VBox status code.
4131 * @retval VINF_SUCCESS.
4132 *
4133 * @param pVCpu The cross context virtual CPU structure.
4134 * @param GCPhysCR3 The physical address in the CR3 register. (A20 mask
4135 * already applied.)
4136 */
4137PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
4138{
4139 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4140 int rc = VINF_SUCCESS;
4141
4142 /* Update guest paging info. */
4143#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4144 || PGM_GST_TYPE == PGM_TYPE_PAE \
4145 || PGM_GST_TYPE == PGM_TYPE_AMD64
4146
4147 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4148 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
4149
4150# if PGM_GST_TYPE == PGM_TYPE_PAE
4151 if (!pVCpu->pgm.s.CTX_SUFF(fPaePdpesAndCr3Mapped))
4152# endif
4153 {
4154 /*
4155 * Map the page CR3 points at.
4156 */
4157 RTHCPTR HCPtrGuestCR3;
4158 rc = pgmGstMapCr3(pVCpu, GCPhysCR3, &HCPtrGuestCR3);
4159 if (RT_SUCCESS(rc))
4160 {
4161# if PGM_GST_TYPE == PGM_TYPE_32BIT
4162# ifdef IN_RING3
4163 pVCpu->pgm.s.pGst32BitPdR3 = (PX86PD)HCPtrGuestCR3;
4164 pVCpu->pgm.s.pGst32BitPdR0 = NIL_RTR0PTR;
4165# else
4166 pVCpu->pgm.s.pGst32BitPdR3 = NIL_RTR3PTR;
4167 pVCpu->pgm.s.pGst32BitPdR0 = (PX86PD)HCPtrGuestCR3;
4168# endif
4169
4170# elif PGM_GST_TYPE == PGM_TYPE_PAE
4171# ifdef IN_RING3
4172 pVCpu->pgm.s.pGstPaePdptR3 = (PX86PDPT)HCPtrGuestCR3;
4173 pVCpu->pgm.s.pGstPaePdptR0 = NIL_RTR0PTR;
4174# else
4175 pVCpu->pgm.s.pGstPaePdptR3 = NIL_RTR3PTR;
4176 pVCpu->pgm.s.pGstPaePdptR0 = (PX86PDPT)HCPtrGuestCR3;
4177# endif
4178
4179 /*
4180 * Update CPUM and map the 4 PDs too.
4181 */
4182 X86PDPE aGstPaePdpes[X86_PG_PAE_PDPE_ENTRIES];
4183 memcpy(&aGstPaePdpes, HCPtrGuestCR3, sizeof(aGstPaePdpes));
4184 CPUMSetGuestPaePdpes(pVCpu, &aGstPaePdpes[0]);
4185 PGMGstMapPaePdpes(pVCpu, &aGstPaePdpes[0]);
4186
4187# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4188# ifdef IN_RING3
4189 pVCpu->pgm.s.pGstAmd64Pml4R3 = (PX86PML4)HCPtrGuestCR3;
4190 pVCpu->pgm.s.pGstAmd64Pml4R0 = NIL_RTR0PTR;
4191# else
4192 pVCpu->pgm.s.pGstAmd64Pml4R3 = NIL_RTR3PTR;
4193 pVCpu->pgm.s.pGstAmd64Pml4R0 = (PX86PML4)HCPtrGuestCR3;
4194# endif
4195# endif
4196 }
4197 else
4198 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4199 }
4200
4201 /*
4202 * Reset fPaePdpesAndCr3Mapped for all modes as there's no guarantee that
4203 * we were called in the correct sequence of PAE followed by other modes
4204 * without CR3 changing in between.
4205 */
4206 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = false;
4207 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = false;
4208#endif
4209
4210 /*
4211 * Update shadow paging info for guest modes with paging (32-bit, PAE, AMD64).
4212 */
4213# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4214 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4215 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4216 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4217 && PGM_GST_TYPE != PGM_TYPE_PROT))
4218
4219 Assert(!pVM->pgm.s.fNestedPaging);
4220 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
4221
4222 /*
4223 * Update the shadow root page as well since that's not fixed.
4224 */
4225 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4226 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4227 PPGMPOOLPAGE pNewShwPageCR3;
4228
4229 PGM_LOCK_VOID(pVM);
4230
4231# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4232 if (pPool->cDirtyPages)
4233 pgmPoolResetDirtyPages(pVM);
4234# endif
4235
4236 Assert(!(GCPhysCR3 >> (GUEST_PAGE_SHIFT + 32))); /** @todo what is this for? */
4237 int const rc2 = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE,
4238 PGM_A20_IS_ENABLED(pVCpu), NIL_PGMPOOL_IDX, UINT32_MAX, true /*fLockPage*/, &pNewShwPageCR3);
4239 AssertFatalRC(rc2);
4240
4241 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4242# ifdef IN_RING0
4243 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4244# else
4245 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4246# endif
4247
4248 /* Set the current hypervisor CR3. */
4249 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4250
4251 /* Clean up the old CR3 root. */
4252 if ( pOldShwPageCR3
4253 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4254 {
4255 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4256
4257 /* Mark the page as unlocked; allow flushing again. */
4258 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4259
4260 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
4261 }
4262 PGM_UNLOCK(pVM);
4263# else
4264 NOREF(GCPhysCR3);
4265# endif
4266
4267 return rc;
4268}
4269
4270/**
4271 * Unmaps the shadow CR3.
4272 *
4273 * @returns VBox status, no specials.
4274 * @param pVCpu The cross context virtual CPU structure.
4275 */
4276PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu)
4277{
4278 LogFlow(("UnmapCR3\n"));
4279
4280 int rc = VINF_SUCCESS;
4281 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4282
4283 /*
4284 * Update guest paging info.
4285 */
4286#if PGM_GST_TYPE == PGM_TYPE_32BIT
4287 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4288 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4289
4290#elif PGM_GST_TYPE == PGM_TYPE_PAE
4291 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4292 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4293 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4294 {
4295 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4296 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4297 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4298 }
4299
4300#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4301 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4302 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4303
4304#else /* prot/real mode stub */
4305 /* nothing to do */
4306#endif
4307
4308 /*
4309 * Update second-level address translation info.
4310 */
4311#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4312 pVCpu->pgm.s.pGstEptPml4R3 = 0;
4313 pVCpu->pgm.s.pGstEptPml4R0 = 0;
4314#endif
4315
4316 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = false;
4317 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = false;
4318
4319 /*
4320 * Update shadow paging info.
4321 */
4322#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4323 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4324 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4325# if PGM_GST_TYPE != PGM_TYPE_REAL
4326 Assert(!pVM->pgm.s.fNestedPaging);
4327# endif
4328 PGM_LOCK_VOID(pVM);
4329
4330 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4331 {
4332 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4333
4334# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4335 if (pPool->cDirtyPages)
4336 pgmPoolResetDirtyPages(pVM);
4337# endif
4338
4339 /* Mark the page as unlocked; allow flushing again. */
4340 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4341
4342 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), NIL_PGMPOOL_IDX, UINT32_MAX);
4343 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4344 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4345 }
4346
4347 PGM_UNLOCK(pVM);
4348#endif
4349
4350 return rc;
4351}
4352
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette