VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 22137

Last change on this file since 22137 was 21966, checked in by vboxsync, 15 years ago

Workaround for fatal cached page table free.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 195.5 KB
Line 
1/* $Id: PGMAllBth.h 21966 2009-08-04 16:47:38Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27RT_C_DECLS_BEGIN
28PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
43PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
44RT_C_DECLS_END
45
46
47/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
48#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
49# error "Invalid combination; PAE guest implies PAE shadow"
50#endif
51
52#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
53 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
54# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
55#endif
56
57#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
58 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
59# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
60#endif
61
62#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
63 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
64# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
65#endif
66
67#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
68# define PGM_WITHOUT_MAPPINGS
69#endif
70
71
72#ifndef IN_RING3
73/**
74 * #PF Handler for raw-mode guest execution.
75 *
76 * @returns VBox status code (appropriate for trap handling and GC return).
77 *
78 * @param pVCpu VMCPU Handle.
79 * @param uErr The trap error code.
80 * @param pRegFrame Trap register frame.
81 * @param pvFault The fault address.
82 */
83PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
84{
85 PVM pVM = pVCpu->CTX_SUFF(pVM);
86
87# if defined(IN_RC) && defined(VBOX_STRICT)
88 PGMDynCheckLocks(pVM);
89# endif
90
91# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
92 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
93 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
94
95# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
96 /*
97 * Hide the instruction fetch trap indicator for now.
98 */
99 /** @todo NXE will change this and we must fix NXE in the switcher too! */
100 if (uErr & X86_TRAP_PF_ID)
101 {
102 uErr &= ~X86_TRAP_PF_ID;
103 TRPMSetErrorCode(pVCpu, uErr);
104 }
105# endif
106
107 /*
108 * Get PDs.
109 */
110 int rc;
111# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
112# if PGM_GST_TYPE == PGM_TYPE_32BIT
113 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
114 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
115
116# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
117
118# if PGM_GST_TYPE == PGM_TYPE_PAE
119 unsigned iPDSrc = 0; /* initialized to shut up gcc */
120 X86PDPE PdpeSrc;
121 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
122
123# elif PGM_GST_TYPE == PGM_TYPE_AMD64
124 unsigned iPDSrc = 0; /* initialized to shut up gcc */
125 PX86PML4E pPml4eSrc;
126 X86PDPE PdpeSrc;
127 PGSTPD pPDSrc;
128
129 pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
130 Assert(pPml4eSrc);
131# endif
132
133 /* Quick check for a valid guest trap. (PAE & AMD64) */
134 if (!pPDSrc)
135 {
136# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
137 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
138# else
139 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
140# endif
141 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
142 TRPMSetErrorCode(pVCpu, uErr);
143 return VINF_EM_RAW_GUEST_TRAP;
144 }
145# endif
146
147# else /* !PGM_WITH_PAGING */
148 PGSTPD pPDSrc = NULL;
149 const unsigned iPDSrc = 0;
150# endif /* !PGM_WITH_PAGING */
151
152 /* Fetch the guest PDE */
153# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
154 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
155# else
156 GSTPDE PdeSrc;
157 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
158 PdeSrc.n.u1Present = 1;
159 PdeSrc.n.u1Write = 1;
160 PdeSrc.n.u1Accessed = 1;
161 PdeSrc.n.u1User = 1;
162# endif
163
164# if PGM_SHW_TYPE == PGM_TYPE_32BIT
165 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
166 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
167
168# elif PGM_SHW_TYPE == PGM_TYPE_PAE
169 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
170
171 PX86PDPAE pPDDst;
172# if PGM_GST_TYPE != PGM_TYPE_PAE
173 X86PDPE PdpeSrc;
174
175 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
176 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
177# endif
178 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
179 if (rc != VINF_SUCCESS)
180 {
181 AssertRC(rc);
182 return rc;
183 }
184 Assert(pPDDst);
185
186# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
187 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
188 PX86PDPAE pPDDst;
189# if PGM_GST_TYPE == PGM_TYPE_PROT
190 /* AMD-V nested paging */
191 X86PML4E Pml4eSrc;
192 X86PDPE PdpeSrc;
193 PX86PML4E pPml4eSrc = &Pml4eSrc;
194
195 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
196 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
197 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
198# endif
199
200 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
201 if (rc != VINF_SUCCESS)
202 {
203 AssertRC(rc);
204 return rc;
205 }
206 Assert(pPDDst);
207
208# elif PGM_SHW_TYPE == PGM_TYPE_EPT
209 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
210 PEPTPD pPDDst;
211
212 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
213 if (rc != VINF_SUCCESS)
214 {
215 AssertRC(rc);
216 return rc;
217 }
218 Assert(pPDDst);
219# endif
220
221# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
222 /*
223 * If we successfully correct the write protection fault due to dirty bit
224 * tracking, or this page fault is a genuine one, then return immediately.
225 */
226 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
227 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
228 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
229 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
230 || rc == VINF_EM_RAW_GUEST_TRAP)
231 {
232 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
233 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
234 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
235 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
236 }
237
238 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
239# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
240
241 /*
242 * A common case is the not-present error caused by lazy page table syncing.
243 *
244 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
245 * so we can safely assume that the shadow PT is present when calling SyncPage later.
246 *
247 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
248 * of mapping conflict and defer to SyncCR3 in R3.
249 * (Again, we do NOT support access handlers for non-present guest pages.)
250 *
251 */
252 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
253 && !pPDDst->a[iPDDst].n.u1Present
254 && PdeSrc.n.u1Present
255 )
256 {
257 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
258 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
259 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
260 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
261 if (RT_SUCCESS(rc))
262 {
263 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
264 return rc;
265 }
266 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
267 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
268 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
269 return VINF_PGM_SYNC_CR3;
270 }
271
272# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
273 /*
274 * Check if this address is within any of our mappings.
275 *
276 * This is *very* fast and it's gonna save us a bit of effort below and prevent
277 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
278 * (BTW, it's impossible to have physical access handlers in a mapping.)
279 */
280 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
281 {
282 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
283 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
284 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
285 {
286 if (pvFault < pMapping->GCPtr)
287 break;
288 if (pvFault - pMapping->GCPtr < pMapping->cb)
289 {
290 /*
291 * The first thing we check is if we've got an undetected conflict.
292 */
293 if (!pVM->pgm.s.fMappingsFixed)
294 {
295 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
296 while (iPT-- > 0)
297 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
298 {
299 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
300 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
301 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
302 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
303 return VINF_PGM_SYNC_CR3;
304 }
305 }
306
307 /*
308 * Check if the fault address is in a virtual page access handler range.
309 */
310 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
311 if ( pCur
312 && pvFault - pCur->Core.Key < pCur->cb
313 && uErr & X86_TRAP_PF_RW)
314 {
315# ifdef IN_RC
316 STAM_PROFILE_START(&pCur->Stat, h);
317 pgmUnlock(pVM);
318 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
319 pgmLock(pVM);
320 STAM_PROFILE_STOP(&pCur->Stat, h);
321# else
322 AssertFailed();
323 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
324# endif
325 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
326 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
327 return rc;
328 }
329
330 /*
331 * Pretend we're not here and let the guest handle the trap.
332 */
333 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
334 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
335 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
336 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
337 return VINF_EM_RAW_GUEST_TRAP;
338 }
339 }
340 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
341 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
342# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
343
344 /*
345 * Check if this fault address is flagged for special treatment,
346 * which means we'll have to figure out the physical address and
347 * check flags associated with it.
348 *
349 * ASSUME that we can limit any special access handling to pages
350 * in page tables which the guest believes to be present.
351 */
352 if (PdeSrc.n.u1Present)
353 {
354 RTGCPHYS GCPhys = NIL_RTGCPHYS;
355
356# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
357# if PGM_GST_TYPE == PGM_TYPE_AMD64
358 bool fBigPagesSupported = true;
359# else
360 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
361# endif
362 if ( PdeSrc.b.u1Size
363 && fBigPagesSupported)
364 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
365 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
366 else
367 {
368 PGSTPT pPTSrc;
369 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
370 if (RT_SUCCESS(rc))
371 {
372 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
373 if (pPTSrc->a[iPTESrc].n.u1Present)
374 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
375 }
376 }
377# else
378 /* No paging so the fault address is the physical address */
379 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
380# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
381
382 /*
383 * If we have a GC address we'll check if it has any flags set.
384 */
385 if (GCPhys != NIL_RTGCPHYS)
386 {
387 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
388
389 PPGMPAGE pPage;
390 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
391 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
392 {
393 if ( PGM_PAGE_HAS_ACTIVE_PHYSICAL_HANDLERS(pPage)
394 || PGM_PAGE_HAS_ACTIVE_VIRTUAL_HANDLERS(pPage))
395 {
396 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
397 {
398 /*
399 * Physical page access handler.
400 */
401 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
402 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
403 if (pCur)
404 {
405# ifdef PGM_SYNC_N_PAGES
406 /*
407 * If the region is write protected and we got a page not present fault, then sync
408 * the pages. If the fault was caused by a read, then restart the instruction.
409 * In case of write access continue to the GC write handler.
410 *
411 * ASSUMES that there is only one handler per page or that they have similar write properties.
412 */
413 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
414 && !(uErr & X86_TRAP_PF_P))
415 {
416 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
417 if ( RT_FAILURE(rc)
418 || !(uErr & X86_TRAP_PF_RW)
419 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
420 {
421 AssertRC(rc);
422 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
423 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
424 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
425 return rc;
426 }
427 }
428# endif
429
430 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
431 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
432 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
433
434# if defined(IN_RC) || defined(IN_RING0)
435 if (pCur->CTX_SUFF(pfnHandler))
436 {
437 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
438# ifdef IN_RING0
439 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
440# else
441 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
442# endif
443 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
444 void *pvUser = pCur->CTX_SUFF(pvUser);
445
446 STAM_PROFILE_START(&pCur->Stat, h);
447 if (fLeaveLock)
448 pgmUnlock(pVM); /* @todo: Not entirely safe. */
449
450 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
451 if (fLeaveLock)
452 pgmLock(pVM);
453# ifdef VBOX_WITH_STATISTICS
454 pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
455 if (pCur)
456 STAM_PROFILE_STOP(&pCur->Stat, h);
457# else
458 pCur = NULL; /* might be invalid by now. */
459# endif
460
461 }
462 else
463# endif
464 rc = VINF_EM_RAW_EMULATE_INSTR;
465
466 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
467 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
468 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
469 return rc;
470 }
471 }
472# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
473 else
474 {
475# ifdef PGM_SYNC_N_PAGES
476 /*
477 * If the region is write protected and we got a page not present fault, then sync
478 * the pages. If the fault was caused by a read, then restart the instruction.
479 * In case of write access continue to the GC write handler.
480 */
481 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
482 && !(uErr & X86_TRAP_PF_P))
483 {
484 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
485 if ( RT_FAILURE(rc)
486 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
487 || !(uErr & X86_TRAP_PF_RW))
488 {
489 AssertRC(rc);
490 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
491 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
492 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
493 return rc;
494 }
495 }
496# endif
497 /*
498 * Ok, it's an virtual page access handler.
499 *
500 * Since it's faster to search by address, we'll do that first
501 * and then retry by GCPhys if that fails.
502 */
503 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
504 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
505 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
506 */
507 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
508 if (pCur)
509 {
510 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
511 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
512 || !(uErr & X86_TRAP_PF_P)
513 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
514 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
515
516 if ( pvFault - pCur->Core.Key < pCur->cb
517 && ( uErr & X86_TRAP_PF_RW
518 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
519 {
520# ifdef IN_RC
521 STAM_PROFILE_START(&pCur->Stat, h);
522 pgmUnlock(pVM);
523 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
524 pgmLock(pVM);
525 STAM_PROFILE_STOP(&pCur->Stat, h);
526# else
527 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
528# endif
529 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
530 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
531 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
532 return rc;
533 }
534 /* Unhandled part of a monitored page */
535 }
536 else
537 {
538 /* Check by physical address. */
539 PPGMVIRTHANDLER pCur;
540 unsigned iPage;
541 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
542 &pCur, &iPage);
543 Assert(RT_SUCCESS(rc) || !pCur);
544 if ( pCur
545 && ( uErr & X86_TRAP_PF_RW
546 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
547 {
548 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
549# ifdef IN_RC
550 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
551 Assert(off < pCur->cb);
552 STAM_PROFILE_START(&pCur->Stat, h);
553 pgmUnlock(pVM);
554 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
555 pgmLock(pVM);
556 STAM_PROFILE_STOP(&pCur->Stat, h);
557# else
558 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
559# endif
560 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
561 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
562 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
563 return rc;
564 }
565 }
566 }
567# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
568
569 /*
570 * There is a handled area of the page, but this fault doesn't belong to it.
571 * We must emulate the instruction.
572 *
573 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
574 * we first check if this was a page-not-present fault for a page with only
575 * write access handlers. Restart the instruction if it wasn't a write access.
576 */
577 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
578
579 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
580 && !(uErr & X86_TRAP_PF_P))
581 {
582 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
583 if ( RT_FAILURE(rc)
584 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
585 || !(uErr & X86_TRAP_PF_RW))
586 {
587 AssertRC(rc);
588 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
589 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
590 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
591 return rc;
592 }
593 }
594
595 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
596 * It's writing to an unhandled part of the LDT page several million times.
597 */
598 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
599 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
600 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
601 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
602 return rc;
603 } /* if any kind of handler */
604
605# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
606 if (uErr & X86_TRAP_PF_P)
607 {
608 /*
609 * The page isn't marked, but it might still be monitored by a virtual page access handler.
610 * (ASSUMES no temporary disabling of virtual handlers.)
611 */
612 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
613 * we should correct both the shadow page table and physical memory flags, and not only check for
614 * accesses within the handler region but for access to pages with virtual handlers. */
615 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
616 if (pCur)
617 {
618 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
619 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
620 || !(uErr & X86_TRAP_PF_P)
621 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
622 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
623
624 if ( pvFault - pCur->Core.Key < pCur->cb
625 && ( uErr & X86_TRAP_PF_RW
626 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
627 {
628# ifdef IN_RC
629 STAM_PROFILE_START(&pCur->Stat, h);
630 pgmUnlock(pVM);
631 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
632 pgmLock(pVM);
633 STAM_PROFILE_STOP(&pCur->Stat, h);
634# else
635 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
636# endif
637 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
638 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
639 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
640 return rc;
641 }
642 }
643 }
644# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
645 }
646 else
647 {
648 /*
649 * When the guest accesses invalid physical memory (e.g. probing
650 * of RAM or accessing a remapped MMIO range), then we'll fall
651 * back to the recompiler to emulate the instruction.
652 */
653 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
654 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
655 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
656 return VINF_EM_RAW_EMULATE_INSTR;
657 }
658
659 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
660
661# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
662 /*
663 * We are here only if page is present in Guest page tables and
664 * trap is not handled by our handlers.
665 *
666 * Check it for page out-of-sync situation.
667 */
668 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
669
670 if (!(uErr & X86_TRAP_PF_P))
671 {
672 /*
673 * Page is not present in our page tables.
674 * Try to sync it!
675 * BTW, fPageShw is invalid in this branch!
676 */
677 if (uErr & X86_TRAP_PF_US)
678 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
679 else /* supervisor */
680 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
681
682# if defined(LOG_ENABLED) && !defined(IN_RING0)
683 RTGCPHYS GCPhys;
684 uint64_t fPageGst;
685 PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys);
686 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%RGp scan=%d\n",
687 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)));
688# endif /* LOG_ENABLED */
689
690# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
691 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
692 {
693 uint64_t fPageGst;
694 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
695 if ( RT_SUCCESS(rc)
696 && !(fPageGst & X86_PTE_US))
697 {
698 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
699 if ( pvFault == (RTGCPTR)pRegFrame->eip
700 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
701# ifdef CSAM_DETECT_NEW_CODE_PAGES
702 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
703 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)) /* any new code we encounter here */
704# endif /* CSAM_DETECT_NEW_CODE_PAGES */
705 )
706 {
707 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
708 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
709 if (rc != VINF_SUCCESS)
710 {
711 /*
712 * CSAM needs to perform a job in ring 3.
713 *
714 * Sync the page before going to the host context; otherwise we'll end up in a loop if
715 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
716 */
717 LogFlow(("CSAM ring 3 job\n"));
718 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
719 AssertRC(rc2);
720
721 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
722 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
723 return rc;
724 }
725 }
726# ifdef CSAM_DETECT_NEW_CODE_PAGES
727 else if ( uErr == X86_TRAP_PF_RW
728 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
729 && pRegFrame->ecx < 0x10000)
730 {
731 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
732 * to detect loading of new code pages.
733 */
734
735 /*
736 * Decode the instruction.
737 */
738 RTGCPTR PC;
739 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
740 if (rc == VINF_SUCCESS)
741 {
742 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
743 uint32_t cbOp;
744 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
745
746 /* For now we'll restrict this to rep movsw/d instructions */
747 if ( rc == VINF_SUCCESS
748 && pDis->pCurInstr->opcode == OP_MOVSWD
749 && (pDis->prefix & PREFIX_REP))
750 {
751 CSAMMarkPossibleCodePage(pVM, pvFault);
752 }
753 }
754 }
755# endif /* CSAM_DETECT_NEW_CODE_PAGES */
756
757 /*
758 * Mark this page as safe.
759 */
760 /** @todo not correct for pages that contain both code and data!! */
761 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
762 CSAMMarkPage(pVM, (RTRCPTR)pvFault, true);
763 }
764 }
765# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
766 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
767 if (RT_SUCCESS(rc))
768 {
769 /* The page was successfully synced, return to the guest. */
770 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
771 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
772 return VINF_SUCCESS;
773 }
774 }
775 else /* uErr & X86_TRAP_PF_P: */
776 {
777 /*
778 * Write protected pages are make writable when the guest makes the first
779 * write to it. This happens for pages that are shared, write monitored
780 * and not yet allocated.
781 *
782 * Also, a side effect of not flushing global PDEs are out of sync pages due
783 * to physical monitored regions, that are no longer valid.
784 * Assume for now it only applies to the read/write flag.
785 */
786 if (RT_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
787 {
788 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
789 {
790 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n",
791 GCPhys, pPage, pvFault, uErr));
792 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
793 if (rc != VINF_SUCCESS)
794 {
795 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
796 return rc;
797 }
798 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
799 return VINF_EM_NO_MEMORY;
800 }
801
802 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
803 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
804 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG))
805 {
806 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
807 uint64_t fPageGst;
808 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
809 if ( RT_SUCCESS(rc)
810 && !(fPageGst & X86_PTE_RW))
811 {
812 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
813 if (RT_SUCCESS(rc))
814 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
815 else
816 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
817 return rc;
818 }
819 AssertMsg(RT_SUCCESS(rc), ("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
820 }
821
822 /// @todo count the above case; else
823 if (uErr & X86_TRAP_PF_US)
824 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
825 else /* supervisor */
826 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
827
828 /*
829 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
830 * page is not present, which is not true in this case.
831 */
832 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
833 if (RT_SUCCESS(rc))
834 {
835 /*
836 * Page was successfully synced, return to guest.
837 */
838# ifdef VBOX_STRICT
839 RTGCPHYS GCPhys;
840 uint64_t fPageGst;
841 if (!HWACCMIsNestedPagingActive(pVM))
842 {
843 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys);
844 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%d fPageGst=%RX64\n"));
845 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
846 }
847 uint64_t fPageShw;
848 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
849 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCPUs > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
850# endif /* VBOX_STRICT */
851 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
852 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
853 return VINF_SUCCESS;
854 }
855 }
856
857# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
858# ifdef VBOX_STRICT
859 /*
860 * Check for VMM page flags vs. Guest page flags consistency.
861 * Currently only for debug purposes.
862 */
863 if (RT_SUCCESS(rc))
864 {
865 /* Get guest page flags. */
866 uint64_t fPageGst;
867 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
868 if (RT_SUCCESS(rc))
869 {
870 uint64_t fPageShw;
871 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
872
873 /*
874 * Compare page flags.
875 * Note: we have AVL, A, D bits desynched.
876 */
877 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
878 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
879 }
880 else
881 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
882 }
883 else
884 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
885# endif /* VBOX_STRICT */
886# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
887 }
888 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
889# endif /* PGM_OUT_OF_SYNC_IN_GC */
890 }
891 else /* GCPhys == NIL_RTGCPHYS */
892 {
893 /*
894 * Page not present in Guest OS or invalid page table address.
895 * This is potential virtual page access handler food.
896 *
897 * For the present we'll say that our access handlers don't
898 * work for this case - we've already discarded the page table
899 * not present case which is identical to this.
900 *
901 * When we perchance find we need this, we will probably have AVL
902 * trees (offset based) to operate on and we can measure their speed
903 * agains mapping a page table and probably rearrange this handling
904 * a bit. (Like, searching virtual ranges before checking the
905 * physical address.)
906 */
907 }
908 }
909 /* else: !present (guest) */
910
911
912# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
913 /*
914 * Conclusion, this is a guest trap.
915 */
916 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
917 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
918 return VINF_EM_RAW_GUEST_TRAP;
919# else
920 /* present, but not a monitored page; perhaps the guest is probing physical memory */
921 return VINF_EM_RAW_EMULATE_INSTR;
922# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
923
924
925# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
926
927 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
928 return VERR_INTERNAL_ERROR;
929# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
930}
931#endif /* !IN_RING3 */
932
933
934/**
935 * Emulation of the invlpg instruction.
936 *
937 *
938 * @returns VBox status code.
939 *
940 * @param pVCpu The VMCPU handle.
941 * @param GCPtrPage Page to invalidate.
942 *
943 * @remark ASSUMES that the guest is updating before invalidating. This order
944 * isn't required by the CPU, so this is speculative and could cause
945 * trouble.
946 * @remark No TLB shootdown is done on any other VCPU as we assume that
947 * invlpg emulation is the *only* reason for calling this function.
948 * (The guest has to shoot down TLB entries on other CPUs itself)
949 * Currently true, but keep in mind!
950 *
951 * @todo Flush page or page directory only if necessary!
952 * @todo Add a #define for simply invalidating the page.
953 */
954PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
955{
956#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
957 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
958 && PGM_SHW_TYPE != PGM_TYPE_EPT
959 int rc;
960 PVM pVM = pVCpu->CTX_SUFF(pVM);
961 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
962
963 Assert(PGMIsLockOwner(pVM));
964
965 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
966 /*
967 * Get the shadow PD entry and skip out if this PD isn't present.
968 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
969 */
970# if PGM_SHW_TYPE == PGM_TYPE_32BIT
971 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
972 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
973
974 /* Fetch the pgm pool shadow descriptor. */
975 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
976 Assert(pShwPde);
977
978# elif PGM_SHW_TYPE == PGM_TYPE_PAE
979 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
980 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
981
982 /* If the shadow PDPE isn't present, then skip the invalidate. */
983 if (!pPdptDst->a[iPdpt].n.u1Present)
984 {
985 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
986 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
987 return VINF_SUCCESS;
988 }
989
990 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
991 PPGMPOOLPAGE pShwPde = NULL;
992 PX86PDPAE pPDDst;
993
994 /* Fetch the pgm pool shadow descriptor. */
995 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
996 AssertRCSuccessReturn(rc, rc);
997 Assert(pShwPde);
998
999 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1000 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1001
1002# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1003 /* PML4 */
1004 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1005 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1006 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1007 PX86PDPAE pPDDst;
1008 PX86PDPT pPdptDst;
1009 PX86PML4E pPml4eDst;
1010 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1011 if (rc != VINF_SUCCESS)
1012 {
1013 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1014 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1015 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
1016 PGM_INVL_VCPU_TLBS(pVCpu);
1017 return VINF_SUCCESS;
1018 }
1019 Assert(pPDDst);
1020
1021 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1022 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1023
1024 if (!pPdpeDst->n.u1Present)
1025 {
1026 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1027 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
1028 PGM_INVL_VCPU_TLBS(pVCpu);
1029 return VINF_SUCCESS;
1030 }
1031
1032# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1033
1034 const SHWPDE PdeDst = *pPdeDst;
1035 if (!PdeDst.n.u1Present)
1036 {
1037 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1038 return VINF_SUCCESS;
1039 }
1040
1041# if defined(IN_RC)
1042 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1043 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1044# endif
1045
1046 /*
1047 * Get the guest PD entry and calc big page.
1048 */
1049# if PGM_GST_TYPE == PGM_TYPE_32BIT
1050 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
1051 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1052 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1053# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1054 unsigned iPDSrc = 0;
1055# if PGM_GST_TYPE == PGM_TYPE_PAE
1056 X86PDPE PdpeSrc;
1057 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1058# else /* AMD64 */
1059 PX86PML4E pPml4eSrc;
1060 X86PDPE PdpeSrc;
1061 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1062# endif
1063 GSTPDE PdeSrc;
1064
1065 if (pPDSrc)
1066 PdeSrc = pPDSrc->a[iPDSrc];
1067 else
1068 PdeSrc.u = 0;
1069# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1070
1071# if PGM_GST_TYPE == PGM_TYPE_AMD64
1072 const bool fIsBigPage = PdeSrc.b.u1Size;
1073# else
1074 const bool fIsBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
1075# endif
1076
1077# ifdef IN_RING3
1078 /*
1079 * If a CR3 Sync is pending we may ignore the invalidate page operation
1080 * depending on the kind of sync and if it's a global page or not.
1081 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1082 */
1083# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1084 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1085 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1086 && fIsBigPage
1087 && PdeSrc.b.u1Global
1088 )
1089 )
1090# else
1091 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1092# endif
1093 {
1094 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1095 return VINF_SUCCESS;
1096 }
1097# endif /* IN_RING3 */
1098
1099# if PGM_GST_TYPE == PGM_TYPE_AMD64
1100 /* Fetch the pgm pool shadow descriptor. */
1101 PPGMPOOLPAGE pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
1102 Assert(pShwPdpt);
1103
1104 /* Fetch the pgm pool shadow descriptor. */
1105 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1106 Assert(pShwPde);
1107
1108 Assert(pPml4eDst->n.u1Present && (pPml4eDst->u & SHW_PDPT_MASK));
1109 RTGCPHYS GCPhysPdpt = pPml4eSrc->u & X86_PML4E_PG_MASK;
1110
1111 if ( !pPml4eSrc->n.u1Present
1112 || pShwPdpt->GCPhys != GCPhysPdpt)
1113 {
1114 LogFlow(("InvalidatePage: Out-of-sync PML4E (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1115 GCPtrPage, pShwPdpt->GCPhys, GCPhysPdpt, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1116 pgmPoolFreeByPage(pPool, pShwPdpt, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1117 ASMAtomicWriteSize(pPml4eDst, 0);
1118 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1119 PGM_INVL_VCPU_TLBS(pVCpu);
1120 return VINF_SUCCESS;
1121 }
1122 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
1123 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
1124 {
1125 /*
1126 * Mark not present so we can resync the PML4E when it's used.
1127 */
1128 LogFlow(("InvalidatePage: Out-of-sync PML4E at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1129 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1130 pgmPoolFreeByPage(pPool, pShwPdpt, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1131 ASMAtomicWriteSize(pPml4eDst, 0);
1132 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1133 PGM_INVL_VCPU_TLBS(pVCpu);
1134 }
1135 else if (!pPml4eSrc->n.u1Accessed)
1136 {
1137 /*
1138 * Mark not present so we can set the accessed bit.
1139 */
1140 LogFlow(("InvalidatePage: Out-of-sync PML4E (A) at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1141 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1142 pgmPoolFreeByPage(pPool, pShwPdpt, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1143 ASMAtomicWriteSize(pPml4eDst, 0);
1144 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1145 PGM_INVL_VCPU_TLBS(pVCpu);
1146 }
1147
1148 /* Check if the PDPT entry has changed. */
1149 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
1150 RTGCPHYS GCPhysPd = PdpeSrc.u & GST_PDPE_PG_MASK;
1151 if ( !PdpeSrc.n.u1Present
1152 || pShwPde->GCPhys != GCPhysPd)
1153 {
1154 LogFlow(("InvalidatePage: Out-of-sync PDPE (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1155 GCPtrPage, pShwPde->GCPhys, GCPhysPd, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1156 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1157 ASMAtomicWriteSize(pPdpeDst, 0);
1158 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1159 PGM_INVL_VCPU_TLBS(pVCpu);
1160 return VINF_SUCCESS;
1161 }
1162 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
1163 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
1164 {
1165 /*
1166 * Mark not present so we can resync the PDPTE when it's used.
1167 */
1168 LogFlow(("InvalidatePage: Out-of-sync PDPE at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1169 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1170 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1171 ASMAtomicWriteSize(pPdpeDst, 0);
1172 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1173 PGM_INVL_VCPU_TLBS(pVCpu);
1174 }
1175 else if (!PdpeSrc.lm.u1Accessed)
1176 {
1177 /*
1178 * Mark not present so we can set the accessed bit.
1179 */
1180 LogFlow(("InvalidatePage: Out-of-sync PDPE (A) at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1181 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1182 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1183 ASMAtomicWriteSize(pPdpeDst, 0);
1184 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1185 PGM_INVL_VCPU_TLBS(pVCpu);
1186 }
1187# endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1188
1189 /*
1190 * Deal with the Guest PDE.
1191 */
1192 rc = VINF_SUCCESS;
1193 if (PdeSrc.n.u1Present)
1194 {
1195# ifndef PGM_WITHOUT_MAPPING
1196 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1197 {
1198 /*
1199 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1200 */
1201 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1202 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1203 pgmLock(pVM);
1204 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1205 pgmUnlock(pVM);
1206 }
1207 else
1208# endif /* !PGM_WITHOUT_MAPPING */
1209 if ( PdeSrc.n.u1User != PdeDst.n.u1User
1210 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1211 {
1212 /*
1213 * Mark not present so we can resync the PDE when it's used.
1214 */
1215 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1216 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1217 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1218 ASMAtomicWriteSize(pPdeDst, 0);
1219 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1220 PGM_INVL_VCPU_TLBS(pVCpu);
1221 }
1222 else if (!PdeSrc.n.u1Accessed)
1223 {
1224 /*
1225 * Mark not present so we can set the accessed bit.
1226 */
1227 LogFlow(("InvalidatePage: Out-of-sync (A) at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1228 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1229 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1230 ASMAtomicWriteSize(pPdeDst, 0);
1231 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1232 PGM_INVL_VCPU_TLBS(pVCpu);
1233 }
1234 else if (!fIsBigPage)
1235 {
1236 /*
1237 * 4KB - page.
1238 */
1239 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1240 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1241# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1242 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1243 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1244# endif
1245 if (pShwPage->GCPhys == GCPhys)
1246 {
1247# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1248 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1249 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1250 if (pPT->a[iPTEDst].n.u1Present)
1251 {
1252# ifdef PGMPOOL_WITH_USER_TRACKING
1253 /* This is very unlikely with caching/monitoring enabled. */
1254 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1255# endif
1256 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1257 }
1258# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1259 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1260 if (RT_SUCCESS(rc))
1261 rc = VINF_SUCCESS;
1262# endif
1263 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1264 PGM_INVL_PG(pVCpu, GCPtrPage);
1265 }
1266 else
1267 {
1268 /*
1269 * The page table address changed.
1270 */
1271 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1272 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1273 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1274 ASMAtomicWriteSize(pPdeDst, 0);
1275 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1276 PGM_INVL_VCPU_TLBS(pVCpu);
1277 }
1278 }
1279 else
1280 {
1281 /*
1282 * 2/4MB - page.
1283 */
1284 /* Before freeing the page, check if anything really changed. */
1285 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1286 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1287# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1288 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1289 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1290# endif
1291 if ( pShwPage->GCPhys == GCPhys
1292 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1293 {
1294 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1295 /** @todo PAT */
1296 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1297 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1298 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1299 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1300 {
1301 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1302 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1303# if defined(IN_RC)
1304 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1305 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1306# endif
1307 return VINF_SUCCESS;
1308 }
1309 }
1310
1311 /*
1312 * Ok, the page table is present and it's been changed in the guest.
1313 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1314 * We could do this for some flushes in GC too, but we need an algorithm for
1315 * deciding which 4MB pages containing code likely to be executed very soon.
1316 */
1317 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1318 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1319 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1320 ASMAtomicWriteSize(pPdeDst, 0);
1321 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1322 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1323 }
1324 }
1325 else
1326 {
1327 /*
1328 * Page directory is not present, mark shadow PDE not present.
1329 */
1330 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1331 {
1332 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1333 ASMAtomicWriteSize(pPdeDst, 0);
1334 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1335 PGM_INVL_PG(pVCpu, GCPtrPage);
1336 }
1337 else
1338 {
1339 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1340 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1341 }
1342 }
1343# if defined(IN_RC)
1344 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1345 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1346# endif
1347 return rc;
1348
1349#else /* guest real and protected mode */
1350 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1351 return VINF_SUCCESS;
1352#endif
1353}
1354
1355
1356#ifdef PGMPOOL_WITH_USER_TRACKING
1357/**
1358 * Update the tracking of shadowed pages.
1359 *
1360 * @param pVCpu The VMCPU handle.
1361 * @param pShwPage The shadow page.
1362 * @param HCPhys The physical page we is being dereferenced.
1363 */
1364DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1365{
1366# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1367 PVM pVM = pVCpu->CTX_SUFF(pVM);
1368
1369 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1370 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1371
1372 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1373 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1374 * 2. write protect all shadowed pages. I.e. implement caching.
1375 */
1376 /*
1377 * Find the guest address.
1378 */
1379 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1380 pRam;
1381 pRam = pRam->CTX_SUFF(pNext))
1382 {
1383 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1384 while (iPage-- > 0)
1385 {
1386 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1387 {
1388 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1389 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1390 pShwPage->cPresent--;
1391 pPool->cPresent--;
1392 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1393 return;
1394 }
1395 }
1396 }
1397
1398 for (;;)
1399 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1400# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1401 pShwPage->cPresent--;
1402 pVM->pgm.s.CTX_SUFF(pPool)->cPresent--;
1403# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1404}
1405
1406
1407/**
1408 * Update the tracking of shadowed pages.
1409 *
1410 * @param pVCpu The VMCPU handle.
1411 * @param pShwPage The shadow page.
1412 * @param u16 The top 16-bit of the pPage->HCPhys.
1413 * @param pPage Pointer to the guest page. this will be modified.
1414 * @param iPTDst The index into the shadow table.
1415 */
1416DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1417{
1418 PVM pVM = pVCpu->CTX_SUFF(pVM);
1419# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1420 /*
1421 * Just deal with the simple first time here.
1422 */
1423 if (!u16)
1424 {
1425 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1426 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1427 }
1428 else
1429 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1430
1431 /* write back */
1432 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1433 PGM_PAGE_SET_TRACKING(pPage, u16);
1434
1435# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1436
1437 /* update statistics. */
1438 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1439 pShwPage->cPresent++;
1440 if (pShwPage->iFirstPresent > iPTDst)
1441 pShwPage->iFirstPresent = iPTDst;
1442}
1443#endif /* PGMPOOL_WITH_USER_TRACKING */
1444
1445
1446/**
1447 * Creates a 4K shadow page for a guest page.
1448 *
1449 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1450 * physical address. The PdeSrc argument only the flags are used. No page structured
1451 * will be mapped in this function.
1452 *
1453 * @param pVCpu The VMCPU handle.
1454 * @param pPteDst Destination page table entry.
1455 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1456 * Can safely assume that only the flags are being used.
1457 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1458 * @param pShwPage Pointer to the shadow page.
1459 * @param iPTDst The index into the shadow table.
1460 *
1461 * @remark Not used for 2/4MB pages!
1462 */
1463DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1464{
1465 if (PteSrc.n.u1Present)
1466 {
1467 PVM pVM = pVCpu->CTX_SUFF(pVM);
1468
1469 /*
1470 * Find the ram range.
1471 */
1472 PPGMPAGE pPage;
1473 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1474 if (RT_SUCCESS(rc))
1475 {
1476#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1477 /* Try make the page writable if necessary. */
1478 if ( PteSrc.n.u1Write
1479 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1480 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1481 {
1482 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1483 AssertRC(rc);
1484 }
1485#endif
1486
1487 /** @todo investiage PWT, PCD and PAT. */
1488 /*
1489 * Make page table entry.
1490 */
1491 SHWPTE PteDst;
1492 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1493 {
1494 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1495 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1496 {
1497#if PGM_SHW_TYPE == PGM_TYPE_EPT
1498 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1499 PteDst.n.u1Present = 1;
1500 PteDst.n.u1Execute = 1;
1501 PteDst.n.u1IgnorePAT = 1;
1502 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1503 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1504#else
1505 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1506 | PGM_PAGE_GET_HCPHYS(pPage);
1507#endif
1508 }
1509 else
1510 {
1511 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1512 PteDst.u = 0;
1513 }
1514 /** @todo count these two kinds. */
1515 }
1516 else
1517 {
1518#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1519 /*
1520 * If the page or page directory entry is not marked accessed,
1521 * we mark the page not present.
1522 */
1523 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1524 {
1525 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1526 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1527 PteDst.u = 0;
1528 }
1529 else
1530 /*
1531 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1532 * when the page is modified.
1533 */
1534 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1535 {
1536 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1537 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1538 | PGM_PAGE_GET_HCPHYS(pPage)
1539 | PGM_PTFLAGS_TRACK_DIRTY;
1540 }
1541 else
1542#endif
1543 {
1544 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1545#if PGM_SHW_TYPE == PGM_TYPE_EPT
1546 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1547 PteDst.n.u1Present = 1;
1548 PteDst.n.u1Write = 1;
1549 PteDst.n.u1Execute = 1;
1550 PteDst.n.u1IgnorePAT = 1;
1551 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1552 /* PteDst.n.u1Size = 0 */
1553#else
1554 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1555 | PGM_PAGE_GET_HCPHYS(pPage);
1556#endif
1557 }
1558 }
1559
1560 /*
1561 * Make sure only allocated pages are mapped writable.
1562 */
1563 if ( PteDst.n.u1Write
1564 && PteDst.n.u1Present
1565 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1566 {
1567 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1568 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1569 }
1570
1571#ifdef PGMPOOL_WITH_USER_TRACKING
1572 /*
1573 * Keep user track up to date.
1574 */
1575 if (PteDst.n.u1Present)
1576 {
1577 if (!pPteDst->n.u1Present)
1578 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1579 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1580 {
1581 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1582 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1583 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1584 }
1585 }
1586 else if (pPteDst->n.u1Present)
1587 {
1588 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1589 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1590 }
1591#endif /* PGMPOOL_WITH_USER_TRACKING */
1592
1593 /*
1594 * Update statistics and commit the entry.
1595 */
1596#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1597 if (!PteSrc.n.u1Global)
1598 pShwPage->fSeenNonGlobal = true;
1599#endif
1600 ASMAtomicWriteSize(pPteDst, PteDst.u);
1601 }
1602 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1603 /** @todo count these. */
1604 }
1605 else
1606 {
1607 /*
1608 * Page not-present.
1609 */
1610 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1611#ifdef PGMPOOL_WITH_USER_TRACKING
1612 /* Keep user track up to date. */
1613 if (pPteDst->n.u1Present)
1614 {
1615 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1616 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1617 }
1618#endif /* PGMPOOL_WITH_USER_TRACKING */
1619 ASMAtomicWriteSize(pPteDst, 0);
1620 /** @todo count these. */
1621 }
1622}
1623
1624
1625/**
1626 * Syncs a guest OS page.
1627 *
1628 * There are no conflicts at this point, neither is there any need for
1629 * page table allocations.
1630 *
1631 * @returns VBox status code.
1632 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1633 * @param pVCpu The VMCPU handle.
1634 * @param PdeSrc Page directory entry of the guest.
1635 * @param GCPtrPage Guest context page address.
1636 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1637 * @param uErr Fault error (X86_TRAP_PF_*).
1638 */
1639PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1640{
1641 PVM pVM = pVCpu->CTX_SUFF(pVM);
1642 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1643 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1644
1645 Assert(PGMIsLockOwner(pVM));
1646
1647#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1648 || PGM_GST_TYPE == PGM_TYPE_PAE \
1649 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1650 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1651 && PGM_SHW_TYPE != PGM_TYPE_EPT
1652
1653# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1654 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVCpu) & MSR_K6_EFER_NXE);
1655# endif
1656
1657 /*
1658 * Assert preconditions.
1659 */
1660 Assert(PdeSrc.n.u1Present);
1661 Assert(cPages);
1662 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1663
1664 /*
1665 * Get the shadow PDE, find the shadow page table in the pool.
1666 */
1667# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1668 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1669 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1670
1671 /* Fetch the pgm pool shadow descriptor. */
1672 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1673 Assert(pShwPde);
1674
1675# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1676 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1677 PPGMPOOLPAGE pShwPde = NULL;
1678 PX86PDPAE pPDDst;
1679
1680 /* Fetch the pgm pool shadow descriptor. */
1681 int rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1682 AssertRCSuccessReturn(rc, rc);
1683 Assert(pShwPde);
1684
1685 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1686 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1687
1688# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1689 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1690 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1691 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1692 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1693
1694 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1695 AssertRCSuccessReturn(rc, rc);
1696 Assert(pPDDst && pPdptDst);
1697 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1698# endif
1699 SHWPDE PdeDst = *pPdeDst;
1700 if (!PdeDst.n.u1Present)
1701 {
1702 AssertMsg(pVM->cCPUs > 1, ("%Unexpected missing PDE p=%llx\n", pPdeDst, (uint64_t)PdeDst.u));
1703 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", GCPtrPage));
1704 return VINF_SUCCESS; /* force the instruction to be executed again. */
1705 }
1706
1707 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1708
1709# if PGM_GST_TYPE == PGM_TYPE_AMD64
1710 /* Fetch the pgm pool shadow descriptor. */
1711 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1712 Assert(pShwPde);
1713# endif
1714
1715# if defined(IN_RC)
1716 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1717 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1718# endif
1719
1720 /*
1721 * Check that the page is present and that the shadow PDE isn't out of sync.
1722 */
1723# if PGM_GST_TYPE == PGM_TYPE_AMD64
1724 const bool fBigPage = PdeSrc.b.u1Size;
1725# else
1726 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
1727# endif
1728 RTGCPHYS GCPhys;
1729 if (!fBigPage)
1730 {
1731 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1732# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1733 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1734 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1735# endif
1736 }
1737 else
1738 {
1739 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1740# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1741 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1742 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1743# endif
1744 }
1745 if ( pShwPage->GCPhys == GCPhys
1746 && PdeSrc.n.u1Present
1747 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1748 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1749# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1750 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1751# endif
1752 )
1753 {
1754 /*
1755 * Check that the PDE is marked accessed already.
1756 * Since we set the accessed bit *before* getting here on a #PF, this
1757 * check is only meant for dealing with non-#PF'ing paths.
1758 */
1759 if (PdeSrc.n.u1Accessed)
1760 {
1761 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1762 if (!fBigPage)
1763 {
1764 /*
1765 * 4KB Page - Map the guest page table.
1766 */
1767 PGSTPT pPTSrc;
1768 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1769 if (RT_SUCCESS(rc))
1770 {
1771# ifdef PGM_SYNC_N_PAGES
1772 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1773 if ( cPages > 1
1774 && !(uErr & X86_TRAP_PF_P)
1775 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1776 {
1777 /*
1778 * This code path is currently only taken when the caller is PGMTrap0eHandler
1779 * for non-present pages!
1780 *
1781 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1782 * deal with locality.
1783 */
1784 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1785# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1786 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1787 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1788# else
1789 const unsigned offPTSrc = 0;
1790# endif
1791 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1792 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1793 iPTDst = 0;
1794 else
1795 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1796 for (; iPTDst < iPTDstEnd; iPTDst++)
1797 {
1798 if (!pPTDst->a[iPTDst].n.u1Present)
1799 {
1800 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1801 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1802 NOREF(GCPtrCurPage);
1803#ifndef IN_RING0
1804 /*
1805 * Assuming kernel code will be marked as supervisor - and not as user level
1806 * and executed using a conforming code selector - And marked as readonly.
1807 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1808 */
1809 PPGMPAGE pPage;
1810 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1811 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1812 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)GCPtrCurPage)
1813 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1814 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1815 )
1816#endif /* else: CSAM not active */
1817 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1818 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1819 GCPtrCurPage, PteSrc.n.u1Present,
1820 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1821 PteSrc.n.u1User & PdeSrc.n.u1User,
1822 (uint64_t)PteSrc.u,
1823 (uint64_t)pPTDst->a[iPTDst].u,
1824 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1825 }
1826 }
1827 }
1828 else
1829# endif /* PGM_SYNC_N_PAGES */
1830 {
1831 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1832 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1833 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1834 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1835 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1836 GCPtrPage, PteSrc.n.u1Present,
1837 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1838 PteSrc.n.u1User & PdeSrc.n.u1User,
1839 (uint64_t)PteSrc.u,
1840 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1841 }
1842 }
1843 else /* MMIO or invalid page: emulated in #PF handler. */
1844 {
1845 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1846 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1847 }
1848 }
1849 else
1850 {
1851 /*
1852 * 4/2MB page - lazy syncing shadow 4K pages.
1853 * (There are many causes of getting here, it's no longer only CSAM.)
1854 */
1855 /* Calculate the GC physical address of this 4KB shadow page. */
1856 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1857 /* Find ram range. */
1858 PPGMPAGE pPage;
1859 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1860 if (RT_SUCCESS(rc))
1861 {
1862# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1863 /* Try make the page writable if necessary. */
1864 if ( PdeSrc.n.u1Write
1865 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1866 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1867 {
1868 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
1869 AssertRC(rc);
1870 }
1871# endif
1872
1873 /*
1874 * Make shadow PTE entry.
1875 */
1876 SHWPTE PteDst;
1877 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1878 | PGM_PAGE_GET_HCPHYS(pPage);
1879 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1880 {
1881 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1882 PteDst.n.u1Write = 0;
1883 else
1884 PteDst.u = 0;
1885 }
1886 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1887# ifdef PGMPOOL_WITH_USER_TRACKING
1888 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1889 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1890# endif
1891 /* Make sure only allocated pages are mapped writable. */
1892 if ( PteDst.n.u1Write
1893 && PteDst.n.u1Present
1894 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1895 {
1896 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1897 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1898 }
1899
1900 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1901
1902 /*
1903 * If the page is not flagged as dirty and is writable, then make it read-only
1904 * at PD level, so we can set the dirty bit when the page is modified.
1905 *
1906 * ASSUMES that page access handlers are implemented on page table entry level.
1907 * Thus we will first catch the dirty access and set PDE.D and restart. If
1908 * there is an access handler, we'll trap again and let it work on the problem.
1909 */
1910 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1911 * As for invlpg, it simply frees the whole shadow PT.
1912 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1913 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1914 {
1915 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1916 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1917 PdeDst.n.u1Write = 0;
1918 }
1919 else
1920 {
1921 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1922 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1923 }
1924 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1925 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1926 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1927 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1928 }
1929 else
1930 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1931 }
1932# if defined(IN_RC)
1933 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1934 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1935# endif
1936 return VINF_SUCCESS;
1937 }
1938 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1939 }
1940 else
1941 {
1942 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1943 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1944 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1945 }
1946
1947 /*
1948 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1949 * Yea, I'm lazy.
1950 */
1951 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1952 ASMAtomicWriteSize(pPdeDst, 0);
1953
1954# if defined(IN_RC)
1955 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1956 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1957# endif
1958 PGM_INVL_VCPU_TLBS(pVCpu);
1959 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1960
1961#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1962 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1963 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1964 && !defined(IN_RC)
1965
1966# ifdef PGM_SYNC_N_PAGES
1967 /*
1968 * Get the shadow PDE, find the shadow page table in the pool.
1969 */
1970# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1971 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
1972
1973# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1974 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
1975
1976# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1977 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1978 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1979 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1980 X86PDEPAE PdeDst;
1981 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1982
1983 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1984 AssertRCSuccessReturn(rc, rc);
1985 Assert(pPDDst && pPdptDst);
1986 PdeDst = pPDDst->a[iPDDst];
1987# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1988 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1989 PEPTPD pPDDst;
1990 EPTPDE PdeDst;
1991
1992 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
1993 if (rc != VINF_SUCCESS)
1994 {
1995 AssertRC(rc);
1996 return rc;
1997 }
1998 Assert(pPDDst);
1999 PdeDst = pPDDst->a[iPDDst];
2000# endif
2001 AssertMsg(PdeDst.n.u1Present, ("%#llx\n", (uint64_t)PdeDst.u));
2002 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2003 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2004
2005 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2006 if ( cPages > 1
2007 && !(uErr & X86_TRAP_PF_P)
2008 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2009 {
2010 /*
2011 * This code path is currently only taken when the caller is PGMTrap0eHandler
2012 * for non-present pages!
2013 *
2014 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2015 * deal with locality.
2016 */
2017 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2018 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2019 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2020 iPTDst = 0;
2021 else
2022 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2023 for (; iPTDst < iPTDstEnd; iPTDst++)
2024 {
2025 if (!pPTDst->a[iPTDst].n.u1Present)
2026 {
2027 GSTPTE PteSrc;
2028
2029 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2030
2031 /* Fake the page table entry */
2032 PteSrc.u = GCPtrCurPage;
2033 PteSrc.n.u1Present = 1;
2034 PteSrc.n.u1Dirty = 1;
2035 PteSrc.n.u1Accessed = 1;
2036 PteSrc.n.u1Write = 1;
2037 PteSrc.n.u1User = 1;
2038
2039 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2040
2041 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2042 GCPtrCurPage, PteSrc.n.u1Present,
2043 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2044 PteSrc.n.u1User & PdeSrc.n.u1User,
2045 (uint64_t)PteSrc.u,
2046 (uint64_t)pPTDst->a[iPTDst].u,
2047 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2048
2049 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2050 break;
2051 }
2052 else
2053 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2054 }
2055 }
2056 else
2057# endif /* PGM_SYNC_N_PAGES */
2058 {
2059 GSTPTE PteSrc;
2060 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2061 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2062
2063 /* Fake the page table entry */
2064 PteSrc.u = GCPtrCurPage;
2065 PteSrc.n.u1Present = 1;
2066 PteSrc.n.u1Dirty = 1;
2067 PteSrc.n.u1Accessed = 1;
2068 PteSrc.n.u1Write = 1;
2069 PteSrc.n.u1User = 1;
2070 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2071
2072 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2073 GCPtrPage, PteSrc.n.u1Present,
2074 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2075 PteSrc.n.u1User & PdeSrc.n.u1User,
2076 (uint64_t)PteSrc.u,
2077 (uint64_t)pPTDst->a[iPTDst].u,
2078 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2079 }
2080 return VINF_SUCCESS;
2081
2082#else
2083 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2084 return VERR_INTERNAL_ERROR;
2085#endif
2086}
2087
2088
2089#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2090/**
2091 * Investigate page fault and handle write protection page faults caused by
2092 * dirty bit tracking.
2093 *
2094 * @returns VBox status code.
2095 * @param pVCpu The VMCPU handle.
2096 * @param uErr Page fault error code.
2097 * @param pPdeDst Shadow page directory entry.
2098 * @param pPdeSrc Guest page directory entry.
2099 * @param GCPtrPage Guest context page address.
2100 */
2101PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2102{
2103 bool fWriteProtect = !!(CPUMGetGuestCR0(pVCpu) & X86_CR0_WP);
2104 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2105 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2106# if PGM_GST_TYPE == PGM_TYPE_AMD64
2107 bool fBigPagesSupported = true;
2108# else
2109 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
2110# endif
2111# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2112 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVCpu) & MSR_K6_EFER_NXE);
2113# endif
2114 unsigned uPageFaultLevel;
2115 int rc;
2116 PVM pVM = pVCpu->CTX_SUFF(pVM);
2117 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2118
2119 Assert(PGMIsLockOwner(pVM));
2120
2121 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2122 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2123
2124# if PGM_GST_TYPE == PGM_TYPE_PAE \
2125 || PGM_GST_TYPE == PGM_TYPE_AMD64
2126
2127# if PGM_GST_TYPE == PGM_TYPE_AMD64
2128 PX86PML4E pPml4eSrc;
2129 PX86PDPE pPdpeSrc;
2130
2131 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc);
2132 Assert(pPml4eSrc);
2133
2134 /*
2135 * Real page fault? (PML4E level)
2136 */
2137 if ( (uErr & X86_TRAP_PF_RSVD)
2138 || !pPml4eSrc->n.u1Present
2139 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
2140 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2141 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2142 )
2143 {
2144 uPageFaultLevel = 0;
2145 goto l_UpperLevelPageFault;
2146 }
2147 Assert(pPdpeSrc);
2148
2149# else /* PAE */
2150 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVCpu->pgm.s, GCPtrPage);
2151# endif /* PAE */
2152
2153 /*
2154 * Real page fault? (PDPE level)
2155 */
2156 if ( (uErr & X86_TRAP_PF_RSVD)
2157 || !pPdpeSrc->n.u1Present
2158# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2159 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
2160 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
2161 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2162# endif
2163 )
2164 {
2165 uPageFaultLevel = 1;
2166 goto l_UpperLevelPageFault;
2167 }
2168# endif
2169
2170 /*
2171 * Real page fault? (PDE level)
2172 */
2173 if ( (uErr & X86_TRAP_PF_RSVD)
2174 || !pPdeSrc->n.u1Present
2175# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2176 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
2177# endif
2178 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2179 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2180 {
2181 uPageFaultLevel = 2;
2182 goto l_UpperLevelPageFault;
2183 }
2184
2185 /*
2186 * First check the easy case where the page directory has been marked read-only to track
2187 * the dirty bit of an emulated BIG page
2188 */
2189 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2190 {
2191 /* Mark guest page directory as accessed */
2192# if PGM_GST_TYPE == PGM_TYPE_AMD64
2193 pPml4eSrc->n.u1Accessed = 1;
2194 pPdpeSrc->lm.u1Accessed = 1;
2195# endif
2196 pPdeSrc->b.u1Accessed = 1;
2197
2198 /*
2199 * Only write protection page faults are relevant here.
2200 */
2201 if (fWriteFault)
2202 {
2203 /* Mark guest page directory as dirty (BIG page only). */
2204 pPdeSrc->b.u1Dirty = 1;
2205
2206 if (pPdeDst->n.u1Present)
2207 {
2208 if (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY)
2209 {
2210 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2211 Assert(pPdeSrc->b.u1Write);
2212
2213 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2214 * fault again and take this path to only invalidate the entry.
2215 */
2216 pPdeDst->n.u1Write = 1;
2217 pPdeDst->n.u1Accessed = 1;
2218 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2219 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2220 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2221 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2222 }
2223# ifdef IN_RING0
2224 else
2225 /* Check for stale TLB entry; only applies to the SMP guest case. */
2226 if ( pVM->cCPUs > 1
2227 && pPdeDst->n.u1Write
2228 && pPdeDst->n.u1Accessed)
2229 {
2230 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2231 if (pShwPage)
2232 {
2233 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2234 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2235 if ( pPteDst->n.u1Present
2236 && pPteDst->n.u1Write)
2237 {
2238 /* Stale TLB entry. */
2239 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2240 PGM_INVL_PG(pVCpu, GCPtrPage);
2241
2242 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2243 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2244 }
2245 }
2246 }
2247# endif /* IN_RING0 */
2248 }
2249 }
2250 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2251 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2252 }
2253 /* else: 4KB page table */
2254
2255 /*
2256 * Map the guest page table.
2257 */
2258 PGSTPT pPTSrc;
2259 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2260 if (RT_SUCCESS(rc))
2261 {
2262 /*
2263 * Real page fault?
2264 */
2265 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2266 const GSTPTE PteSrc = *pPteSrc;
2267 if ( !PteSrc.n.u1Present
2268# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2269 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
2270# endif
2271 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
2272 || (fUserLevelFault && !PteSrc.n.u1User)
2273 )
2274 {
2275 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2276 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2277 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2278
2279 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2280 * See the 2nd case above as well.
2281 */
2282 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2283 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2284
2285 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2286 return VINF_EM_RAW_GUEST_TRAP;
2287 }
2288 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2289
2290 /*
2291 * Set the accessed bits in the page directory and the page table.
2292 */
2293# if PGM_GST_TYPE == PGM_TYPE_AMD64
2294 pPml4eSrc->n.u1Accessed = 1;
2295 pPdpeSrc->lm.u1Accessed = 1;
2296# endif
2297 pPdeSrc->n.u1Accessed = 1;
2298 pPteSrc->n.u1Accessed = 1;
2299
2300 /*
2301 * Only write protection page faults are relevant here.
2302 */
2303 if (fWriteFault)
2304 {
2305 /* Write access, so mark guest entry as dirty. */
2306# ifdef VBOX_WITH_STATISTICS
2307 if (!pPteSrc->n.u1Dirty)
2308 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2309 else
2310 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2311# endif
2312
2313 pPteSrc->n.u1Dirty = 1;
2314
2315 if (pPdeDst->n.u1Present)
2316 {
2317#ifndef IN_RING0
2318 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2319 * Our individual shadow handlers will provide more information and force a fatal exit.
2320 */
2321 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2322 {
2323 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2324 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2325 return VINF_SUCCESS;
2326 }
2327#endif
2328 /*
2329 * Map shadow page table.
2330 */
2331 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2332 if (pShwPage)
2333 {
2334 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2335 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2336 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2337 {
2338 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2339 {
2340 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2341# ifdef VBOX_STRICT
2342 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2343 if (pPage)
2344 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
2345 ("Unexpected dirty bit tracking on monitored page %RGv (phys %RGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
2346# endif
2347 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2348
2349 Assert(pPteSrc->n.u1Write);
2350
2351 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2352 * fault again and take this path to only invalidate the entry.
2353 */
2354 pPteDst->n.u1Write = 1;
2355 pPteDst->n.u1Dirty = 1;
2356 pPteDst->n.u1Accessed = 1;
2357 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2358 PGM_INVL_PG(pVCpu, GCPtrPage);
2359
2360 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2361 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2362 }
2363# ifdef IN_RING0
2364 else
2365 /* Check for stale TLB entry; only applies to the SMP guest case. */
2366 if ( pVM->cCPUs > 1
2367 && pPteDst->n.u1Write == 1
2368 && pPteDst->n.u1Accessed == 1)
2369 {
2370 /* Stale TLB entry. */
2371 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2372 PGM_INVL_PG(pVCpu, GCPtrPage);
2373
2374 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2375 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2376 }
2377# endif
2378 }
2379 }
2380 else
2381 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2382 }
2383 }
2384/** @todo Optimize accessed bit emulation? */
2385# ifdef VBOX_STRICT
2386 /*
2387 * Sanity check.
2388 */
2389 else if ( !pPteSrc->n.u1Dirty
2390 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2391 && pPdeDst->n.u1Present)
2392 {
2393 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2394 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2395 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2396 if ( pPteDst->n.u1Present
2397 && pPteDst->n.u1Write)
2398 LogFlow(("Writable present page %RGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2399 }
2400# endif /* VBOX_STRICT */
2401 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2402 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2403 }
2404 AssertRC(rc);
2405 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2406 return rc;
2407
2408
2409l_UpperLevelPageFault:
2410 /*
2411 * Pagefault detected while checking the PML4E, PDPE or PDE.
2412 * Single exit handler to get rid of duplicate code paths.
2413 */
2414 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2415 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2416 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2417
2418 if (
2419# if PGM_GST_TYPE == PGM_TYPE_AMD64
2420 pPml4eSrc->n.u1Present &&
2421# endif
2422# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2423 pPdpeSrc->n.u1Present &&
2424# endif
2425 pPdeSrc->n.u1Present)
2426 {
2427 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2428 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2429 {
2430 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2431 }
2432 else
2433 {
2434 /*
2435 * Map the guest page table.
2436 */
2437 PGSTPT pPTSrc;
2438 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2439 if (RT_SUCCESS(rc))
2440 {
2441 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2442 const GSTPTE PteSrc = *pPteSrc;
2443 if (pPteSrc->n.u1Present)
2444 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2445 }
2446 AssertRC(rc);
2447 }
2448 }
2449 return VINF_EM_RAW_GUEST_TRAP;
2450}
2451#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2452
2453
2454/**
2455 * Sync a shadow page table.
2456 *
2457 * The shadow page table is not present. This includes the case where
2458 * there is a conflict with a mapping.
2459 *
2460 * @returns VBox status code.
2461 * @param pVCpu The VMCPU handle.
2462 * @param iPD Page directory index.
2463 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2464 * Assume this is a temporary mapping.
2465 * @param GCPtrPage GC Pointer of the page that caused the fault
2466 */
2467PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2468{
2469 PVM pVM = pVCpu->CTX_SUFF(pVM);
2470 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2471
2472 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2473 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2474 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2475
2476 Assert(PGMIsLocked(pVM));
2477
2478#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2479 || PGM_GST_TYPE == PGM_TYPE_PAE \
2480 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2481 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2482 && PGM_SHW_TYPE != PGM_TYPE_EPT
2483
2484 int rc = VINF_SUCCESS;
2485
2486 /*
2487 * Validate input a little bit.
2488 */
2489 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2490# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2491 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2492 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2493
2494 /* Fetch the pgm pool shadow descriptor. */
2495 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2496 Assert(pShwPde);
2497
2498# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2499 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2500 PPGMPOOLPAGE pShwPde = NULL;
2501 PX86PDPAE pPDDst;
2502 PSHWPDE pPdeDst;
2503
2504 /* Fetch the pgm pool shadow descriptor. */
2505 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2506 AssertRCSuccessReturn(rc, rc);
2507 Assert(pShwPde);
2508
2509 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2510 pPdeDst = &pPDDst->a[iPDDst];
2511
2512# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2513 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2514 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2515 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2516 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2517 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2518 AssertRCSuccessReturn(rc, rc);
2519 Assert(pPDDst);
2520 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2521# endif
2522 SHWPDE PdeDst = *pPdeDst;
2523
2524# if PGM_GST_TYPE == PGM_TYPE_AMD64
2525 /* Fetch the pgm pool shadow descriptor. */
2526 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2527 Assert(pShwPde);
2528# endif
2529
2530# ifndef PGM_WITHOUT_MAPPINGS
2531 /*
2532 * Check for conflicts.
2533 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2534 * HC: Simply resolve the conflict.
2535 */
2536 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2537 {
2538 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2539# ifndef IN_RING3
2540 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2541 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2542 return VERR_ADDRESS_CONFLICT;
2543# else
2544 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2545 Assert(pMapping);
2546# if PGM_GST_TYPE == PGM_TYPE_32BIT
2547 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2548# elif PGM_GST_TYPE == PGM_TYPE_PAE
2549 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2550# else
2551 AssertFailed(); /* can't happen for amd64 */
2552# endif
2553 if (RT_FAILURE(rc))
2554 {
2555 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2556 return rc;
2557 }
2558 PdeDst = *pPdeDst;
2559# endif
2560 }
2561# else /* PGM_WITHOUT_MAPPINGS */
2562 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2563# endif /* PGM_WITHOUT_MAPPINGS */
2564 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2565
2566# if defined(IN_RC)
2567 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2568 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2569# endif
2570
2571 /*
2572 * Sync page directory entry.
2573 */
2574 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2575 if (PdeSrc.n.u1Present)
2576 {
2577 /*
2578 * Allocate & map the page table.
2579 */
2580 PSHWPT pPTDst;
2581# if PGM_GST_TYPE == PGM_TYPE_AMD64
2582 const bool fPageTable = !PdeSrc.b.u1Size;
2583# else
2584 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
2585# endif
2586 PPGMPOOLPAGE pShwPage;
2587 RTGCPHYS GCPhys;
2588 if (fPageTable)
2589 {
2590 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2591# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2592 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2593 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2594# endif
2595 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2596 }
2597 else
2598 {
2599 PGMPOOLACCESS enmAccess;
2600
2601 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2602# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2603 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2604 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2605# endif
2606 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2607 if (PdeSrc.n.u1User)
2608 {
2609 if (PdeSrc.n.u1Write)
2610 enmAccess = PGMPOOLACCESS_USER_RW;
2611 else
2612 enmAccess = PGMPOOLACCESS_USER_R;
2613 }
2614 else
2615 {
2616 if (PdeSrc.n.u1Write)
2617 enmAccess = PGMPOOLACCESS_SUPERVISOR_RW;
2618 else
2619 enmAccess = PGMPOOLACCESS_SUPERVISOR_R;
2620 }
2621 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2622 }
2623 if (rc == VINF_SUCCESS)
2624 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2625 else if (rc == VINF_PGM_CACHED_PAGE)
2626 {
2627 /*
2628 * The PT was cached, just hook it up.
2629 */
2630 if (fPageTable)
2631 PdeDst.u = pShwPage->Core.Key
2632 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2633 else
2634 {
2635 PdeDst.u = pShwPage->Core.Key
2636 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2637 /* (see explanation and assumptions further down.) */
2638 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2639 {
2640 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2641 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2642 PdeDst.b.u1Write = 0;
2643 }
2644 }
2645 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2646# if defined(IN_RC)
2647 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2648# endif
2649 return VINF_SUCCESS;
2650 }
2651 else if (rc == VERR_PGM_POOL_FLUSHED)
2652 {
2653 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2654# if defined(IN_RC)
2655 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2656# endif
2657 return VINF_PGM_SYNC_CR3;
2658 }
2659 else
2660 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2661 PdeDst.u &= X86_PDE_AVL_MASK;
2662 PdeDst.u |= pShwPage->Core.Key;
2663
2664 /*
2665 * Page directory has been accessed (this is a fault situation, remember).
2666 */
2667 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2668 if (fPageTable)
2669 {
2670 /*
2671 * Page table - 4KB.
2672 *
2673 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2674 */
2675 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2676 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2677 PGSTPT pPTSrc;
2678 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2679 if (RT_SUCCESS(rc))
2680 {
2681 /*
2682 * Start by syncing the page directory entry so CSAM's TLB trick works.
2683 */
2684 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2685 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2686 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2687# if defined(IN_RC)
2688 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2689# endif
2690
2691 /*
2692 * Directory/page user or supervisor privilege: (same goes for read/write)
2693 *
2694 * Directory Page Combined
2695 * U/S U/S U/S
2696 * 0 0 0
2697 * 0 1 0
2698 * 1 0 0
2699 * 1 1 1
2700 *
2701 * Simple AND operation. Table listed for completeness.
2702 *
2703 */
2704 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2705# ifdef PGM_SYNC_N_PAGES
2706 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2707 unsigned iPTDst = iPTBase;
2708 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2709 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2710 iPTDst = 0;
2711 else
2712 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2713# else /* !PGM_SYNC_N_PAGES */
2714 unsigned iPTDst = 0;
2715 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2716# endif /* !PGM_SYNC_N_PAGES */
2717# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2718 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2719 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2720# else
2721 const unsigned offPTSrc = 0;
2722# endif
2723 for (; iPTDst < iPTDstEnd; iPTDst++)
2724 {
2725 const unsigned iPTSrc = iPTDst + offPTSrc;
2726 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2727
2728 if (PteSrc.n.u1Present) /* we've already cleared it above */
2729 {
2730# ifndef IN_RING0
2731 /*
2732 * Assuming kernel code will be marked as supervisor - and not as user level
2733 * and executed using a conforming code selector - And marked as readonly.
2734 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2735 */
2736 PPGMPAGE pPage;
2737 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2738 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2739 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2740 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2741 )
2742# endif
2743 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2744 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2745 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2746 PteSrc.n.u1Present,
2747 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2748 PteSrc.n.u1User & PdeSrc.n.u1User,
2749 (uint64_t)PteSrc.u,
2750 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2751 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2752 }
2753 } /* for PTEs */
2754 }
2755 }
2756 else
2757 {
2758 /*
2759 * Big page - 2/4MB.
2760 *
2761 * We'll walk the ram range list in parallel and optimize lookups.
2762 * We will only sync on shadow page table at a time.
2763 */
2764 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2765
2766 /**
2767 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2768 */
2769
2770 /*
2771 * Start by syncing the page directory entry.
2772 */
2773 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2774 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2775
2776 /*
2777 * If the page is not flagged as dirty and is writable, then make it read-only
2778 * at PD level, so we can set the dirty bit when the page is modified.
2779 *
2780 * ASSUMES that page access handlers are implemented on page table entry level.
2781 * Thus we will first catch the dirty access and set PDE.D and restart. If
2782 * there is an access handler, we'll trap again and let it work on the problem.
2783 */
2784 /** @todo move the above stuff to a section in the PGM documentation. */
2785 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2786 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2787 {
2788 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2789 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2790 PdeDst.b.u1Write = 0;
2791 }
2792 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2793# if defined(IN_RC)
2794 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2795# endif
2796
2797 /*
2798 * Fill the shadow page table.
2799 */
2800 /* Get address and flags from the source PDE. */
2801 SHWPTE PteDstBase;
2802 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2803
2804 /* Loop thru the entries in the shadow PT. */
2805 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2806 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2807 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2808 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2809 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2810 unsigned iPTDst = 0;
2811 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2812 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2813 {
2814 /* Advance ram range list. */
2815 while (pRam && GCPhys > pRam->GCPhysLast)
2816 pRam = pRam->CTX_SUFF(pNext);
2817 if (pRam && GCPhys >= pRam->GCPhys)
2818 {
2819 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2820 do
2821 {
2822 /* Make shadow PTE. */
2823 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2824 SHWPTE PteDst;
2825
2826# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2827 /* Try make the page writable if necessary. */
2828 if ( PteDstBase.n.u1Write
2829 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2830 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2831 {
2832 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
2833 AssertRCReturn(rc, rc);
2834 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2835 break;
2836 }
2837# endif
2838
2839 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2840 {
2841 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2842 {
2843 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2844 PteDst.n.u1Write = 0;
2845 }
2846 else
2847 PteDst.u = 0;
2848 }
2849# ifndef IN_RING0
2850 /*
2851 * Assuming kernel code will be marked as supervisor and not as user level and executed
2852 * using a conforming code selector. Don't check for readonly, as that implies the whole
2853 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2854 */
2855 else if ( !PdeSrc.n.u1User
2856 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2857 PteDst.u = 0;
2858# endif
2859 else
2860 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2861
2862 /* Only map writable pages writable. */
2863 if ( PteDst.n.u1Write
2864 && PteDst.n.u1Present
2865 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2866 {
2867 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2868 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2869 }
2870
2871# ifdef PGMPOOL_WITH_USER_TRACKING
2872 if (PteDst.n.u1Present)
2873 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2874# endif
2875 /* commit it */
2876 pPTDst->a[iPTDst] = PteDst;
2877 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2878 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2879 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2880
2881 /* advance */
2882 GCPhys += PAGE_SIZE;
2883 iHCPage++;
2884 iPTDst++;
2885 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2886 && GCPhys <= pRam->GCPhysLast);
2887 }
2888 else if (pRam)
2889 {
2890 Log(("Invalid pages at %RGp\n", GCPhys));
2891 do
2892 {
2893 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2894 GCPhys += PAGE_SIZE;
2895 iPTDst++;
2896 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2897 && GCPhys < pRam->GCPhys);
2898 }
2899 else
2900 {
2901 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2902 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2903 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2904 }
2905 } /* while more PTEs */
2906 } /* 4KB / 4MB */
2907 }
2908 else
2909 AssertRelease(!PdeDst.n.u1Present);
2910
2911 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2912 if (RT_FAILURE(rc))
2913 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2914 return rc;
2915
2916#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2917 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2918 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2919 && !defined(IN_RC)
2920
2921 /*
2922 * Validate input a little bit.
2923 */
2924 int rc = VINF_SUCCESS;
2925# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2926 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2927 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2928
2929 /* Fetch the pgm pool shadow descriptor. */
2930 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2931 Assert(pShwPde);
2932
2933# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2934 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2935 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
2936 PX86PDPAE pPDDst;
2937 PSHWPDE pPdeDst;
2938
2939 /* Fetch the pgm pool shadow descriptor. */
2940 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2941 AssertRCSuccessReturn(rc, rc);
2942 Assert(pShwPde);
2943
2944 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2945 pPdeDst = &pPDDst->a[iPDDst];
2946
2947# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2948 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2949 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2950 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2951 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
2952 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2953 AssertRCSuccessReturn(rc, rc);
2954 Assert(pPDDst);
2955 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2956
2957 /* Fetch the pgm pool shadow descriptor. */
2958 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2959 Assert(pShwPde);
2960
2961# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2962 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2963 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2964 PEPTPD pPDDst;
2965 PEPTPDPT pPdptDst;
2966
2967 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
2968 if (rc != VINF_SUCCESS)
2969 {
2970 AssertRC(rc);
2971 return rc;
2972 }
2973 Assert(pPDDst);
2974 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2975
2976 /* Fetch the pgm pool shadow descriptor. */
2977 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2978 Assert(pShwPde);
2979# endif
2980 SHWPDE PdeDst = *pPdeDst;
2981
2982 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2983 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2984
2985 GSTPDE PdeSrc;
2986 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2987 PdeSrc.n.u1Present = 1;
2988 PdeSrc.n.u1Write = 1;
2989 PdeSrc.n.u1Accessed = 1;
2990 PdeSrc.n.u1User = 1;
2991
2992 /*
2993 * Allocate & map the page table.
2994 */
2995 PSHWPT pPTDst;
2996 PPGMPOOLPAGE pShwPage;
2997 RTGCPHYS GCPhys;
2998
2999 /* Virtual address = physical address */
3000 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3001 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3002
3003 if ( rc == VINF_SUCCESS
3004 || rc == VINF_PGM_CACHED_PAGE)
3005 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3006 else
3007 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3008
3009 PdeDst.u &= X86_PDE_AVL_MASK;
3010 PdeDst.u |= pShwPage->Core.Key;
3011 PdeDst.n.u1Present = 1;
3012 PdeDst.n.u1Write = 1;
3013# if PGM_SHW_TYPE == PGM_TYPE_EPT
3014 PdeDst.n.u1Execute = 1;
3015# else
3016 PdeDst.n.u1User = 1;
3017 PdeDst.n.u1Accessed = 1;
3018# endif
3019 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3020
3021 pgmLock(pVM);
3022 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3023 pgmUnlock(pVM);
3024 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3025 return rc;
3026
3027#else
3028 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3029 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3030 return VERR_INTERNAL_ERROR;
3031#endif
3032}
3033
3034
3035
3036/**
3037 * Prefetch a page/set of pages.
3038 *
3039 * Typically used to sync commonly used pages before entering raw mode
3040 * after a CR3 reload.
3041 *
3042 * @returns VBox status code.
3043 * @param pVCpu The VMCPU handle.
3044 * @param GCPtrPage Page to invalidate.
3045 */
3046PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3047{
3048#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3049 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3050 /*
3051 * Check that all Guest levels thru the PDE are present, getting the
3052 * PD and PDE in the processes.
3053 */
3054 int rc = VINF_SUCCESS;
3055# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3056# if PGM_GST_TYPE == PGM_TYPE_32BIT
3057 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3058 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3059# elif PGM_GST_TYPE == PGM_TYPE_PAE
3060 unsigned iPDSrc;
3061 X86PDPE PdpeSrc;
3062 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3063 if (!pPDSrc)
3064 return VINF_SUCCESS; /* not present */
3065# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3066 unsigned iPDSrc;
3067 PX86PML4E pPml4eSrc;
3068 X86PDPE PdpeSrc;
3069 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3070 if (!pPDSrc)
3071 return VINF_SUCCESS; /* not present */
3072# endif
3073 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3074# else
3075 PGSTPD pPDSrc = NULL;
3076 const unsigned iPDSrc = 0;
3077 GSTPDE PdeSrc;
3078
3079 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3080 PdeSrc.n.u1Present = 1;
3081 PdeSrc.n.u1Write = 1;
3082 PdeSrc.n.u1Accessed = 1;
3083 PdeSrc.n.u1User = 1;
3084# endif
3085
3086 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3087 {
3088 PVM pVM = pVCpu->CTX_SUFF(pVM);
3089 pgmLock(pVM);
3090
3091# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3092 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3093# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3094 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3095 PX86PDPAE pPDDst;
3096 X86PDEPAE PdeDst;
3097# if PGM_GST_TYPE != PGM_TYPE_PAE
3098 X86PDPE PdpeSrc;
3099
3100 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3101 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3102# endif
3103 int rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3104 if (rc != VINF_SUCCESS)
3105 {
3106 pgmUnlock(pVM);
3107 AssertRC(rc);
3108 return rc;
3109 }
3110 Assert(pPDDst);
3111 PdeDst = pPDDst->a[iPDDst];
3112
3113# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3114 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3115 PX86PDPAE pPDDst;
3116 X86PDEPAE PdeDst;
3117
3118# if PGM_GST_TYPE == PGM_TYPE_PROT
3119 /* AMD-V nested paging */
3120 X86PML4E Pml4eSrc;
3121 X86PDPE PdpeSrc;
3122 PX86PML4E pPml4eSrc = &Pml4eSrc;
3123
3124 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3125 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3126 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3127# endif
3128
3129 int rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3130 if (rc != VINF_SUCCESS)
3131 {
3132 pgmUnlock(pVM);
3133 AssertRC(rc);
3134 return rc;
3135 }
3136 Assert(pPDDst);
3137 PdeDst = pPDDst->a[iPDDst];
3138# endif
3139 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3140 {
3141 if (!PdeDst.n.u1Present)
3142 {
3143 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3144 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3145 }
3146 else
3147 {
3148 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3149 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3150 * makes no sense to prefetch more than one page.
3151 */
3152 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3153 if (RT_SUCCESS(rc))
3154 rc = VINF_SUCCESS;
3155 }
3156 }
3157 pgmUnlock(pVM);
3158 }
3159 return rc;
3160
3161#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3162 return VINF_SUCCESS; /* ignore */
3163#endif
3164}
3165
3166
3167
3168
3169/**
3170 * Syncs a page during a PGMVerifyAccess() call.
3171 *
3172 * @returns VBox status code (informational included).
3173 * @param pVCpu The VMCPU handle.
3174 * @param GCPtrPage The address of the page to sync.
3175 * @param fPage The effective guest page flags.
3176 * @param uErr The trap error code.
3177 */
3178PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3179{
3180 PVM pVM = pVCpu->CTX_SUFF(pVM);
3181
3182 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3183
3184 Assert(!HWACCMIsNestedPagingActive(pVM));
3185#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3186 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3187
3188# ifndef IN_RING0
3189 if (!(fPage & X86_PTE_US))
3190 {
3191 /*
3192 * Mark this page as safe.
3193 */
3194 /** @todo not correct for pages that contain both code and data!! */
3195 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3196 CSAMMarkPage(pVM, (RTRCPTR)GCPtrPage, true);
3197 }
3198# endif
3199
3200 /*
3201 * Get guest PD and index.
3202 */
3203# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3204# if PGM_GST_TYPE == PGM_TYPE_32BIT
3205 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3206 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3207# elif PGM_GST_TYPE == PGM_TYPE_PAE
3208 unsigned iPDSrc = 0;
3209 X86PDPE PdpeSrc;
3210 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3211
3212 if (pPDSrc)
3213 {
3214 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3215 return VINF_EM_RAW_GUEST_TRAP;
3216 }
3217# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3218 unsigned iPDSrc;
3219 PX86PML4E pPml4eSrc;
3220 X86PDPE PdpeSrc;
3221 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3222 if (!pPDSrc)
3223 {
3224 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3225 return VINF_EM_RAW_GUEST_TRAP;
3226 }
3227# endif
3228# else
3229 PGSTPD pPDSrc = NULL;
3230 const unsigned iPDSrc = 0;
3231# endif
3232 int rc = VINF_SUCCESS;
3233
3234 pgmLock(pVM);
3235
3236 /*
3237 * First check if the shadow pd is present.
3238 */
3239# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3240 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3241# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3242 PX86PDEPAE pPdeDst;
3243 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3244 PX86PDPAE pPDDst;
3245# if PGM_GST_TYPE != PGM_TYPE_PAE
3246 X86PDPE PdpeSrc;
3247
3248 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3249 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3250# endif
3251 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3252 if (rc != VINF_SUCCESS)
3253 {
3254 pgmUnlock(pVM);
3255 AssertRC(rc);
3256 return rc;
3257 }
3258 Assert(pPDDst);
3259 pPdeDst = &pPDDst->a[iPDDst];
3260
3261# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3262 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3263 PX86PDPAE pPDDst;
3264 PX86PDEPAE pPdeDst;
3265
3266# if PGM_GST_TYPE == PGM_TYPE_PROT
3267 /* AMD-V nested paging */
3268 X86PML4E Pml4eSrc;
3269 X86PDPE PdpeSrc;
3270 PX86PML4E pPml4eSrc = &Pml4eSrc;
3271
3272 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3273 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3274 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3275# endif
3276
3277 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3278 if (rc != VINF_SUCCESS)
3279 {
3280 pgmUnlock(pVM);
3281 AssertRC(rc);
3282 return rc;
3283 }
3284 Assert(pPDDst);
3285 pPdeDst = &pPDDst->a[iPDDst];
3286# endif
3287
3288# if defined(IN_RC)
3289 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3290 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3291# endif
3292
3293 if (!pPdeDst->n.u1Present)
3294 {
3295 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3296 if (rc != VINF_SUCCESS)
3297 {
3298# if defined(IN_RC)
3299 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3300 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3301# endif
3302 pgmUnlock(pVM);
3303 AssertRC(rc);
3304 return rc;
3305 }
3306 }
3307
3308# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3309 /* Check for dirty bit fault */
3310 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3311 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3312 Log(("PGMVerifyAccess: success (dirty)\n"));
3313 else
3314 {
3315 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3316# else
3317 {
3318 GSTPDE PdeSrc;
3319 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3320 PdeSrc.n.u1Present = 1;
3321 PdeSrc.n.u1Write = 1;
3322 PdeSrc.n.u1Accessed = 1;
3323 PdeSrc.n.u1User = 1;
3324
3325# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3326 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3327 if (uErr & X86_TRAP_PF_US)
3328 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3329 else /* supervisor */
3330 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3331
3332 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3333 if (RT_SUCCESS(rc))
3334 {
3335 /* Page was successfully synced */
3336 Log2(("PGMVerifyAccess: success (sync)\n"));
3337 rc = VINF_SUCCESS;
3338 }
3339 else
3340 {
3341 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3342 rc = VINF_EM_RAW_GUEST_TRAP;
3343 }
3344 }
3345# if defined(IN_RC)
3346 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3347 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3348# endif
3349 pgmUnlock(pVM);
3350 return rc;
3351
3352#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3353
3354 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3355 return VERR_INTERNAL_ERROR;
3356#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3357}
3358
3359#undef MY_STAM_COUNTER_INC
3360#define MY_STAM_COUNTER_INC(a) do { } while (0)
3361
3362
3363/**
3364 * Syncs the paging hierarchy starting at CR3.
3365 *
3366 * @returns VBox status code, no specials.
3367 * @param pVCpu The VMCPU handle.
3368 * @param cr0 Guest context CR0 register
3369 * @param cr3 Guest context CR3 register
3370 * @param cr4 Guest context CR4 register
3371 * @param fGlobal Including global page directories or not
3372 */
3373PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3374{
3375 PVM pVM = pVCpu->CTX_SUFF(pVM);
3376
3377 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
3378 fGlobal = true; /* Change this CR3 reload to be a global one. */
3379
3380 LogFlow(("SyncCR3 %d\n", fGlobal));
3381
3382#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3383 /*
3384 * Update page access handlers.
3385 * The virtual are always flushed, while the physical are only on demand.
3386 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3387 * have to look into that later because it will have a bad influence on the performance.
3388 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3389 * bird: Yes, but that won't work for aliases.
3390 */
3391 /** @todo this MUST go away. See #1557. */
3392 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3393 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3394 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3395#endif
3396
3397#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3398 /*
3399 * Nested / EPT - almost no work.
3400 */
3401 /** @todo check if this is really necessary; the call does it as well... */
3402 HWACCMFlushTLB(pVCpu);
3403 return VINF_SUCCESS;
3404
3405#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3406 /*
3407 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3408 * out the shadow parts when the guest modifies its tables.
3409 */
3410 return VINF_SUCCESS;
3411
3412#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3413
3414# ifdef PGM_WITHOUT_MAPPINGS
3415 Assert(pVM->pgm.s.fMappingsFixed);
3416 return VINF_SUCCESS;
3417# else
3418 /* Nothing to do when mappings are fixed. */
3419 if (pVM->pgm.s.fMappingsFixed)
3420 return VINF_SUCCESS;
3421
3422 int rc = PGMMapResolveConflicts(pVM);
3423 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3424 if (rc == VINF_PGM_SYNC_CR3)
3425 {
3426 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3427 return VINF_PGM_SYNC_CR3;
3428 }
3429# endif
3430 return VINF_SUCCESS;
3431#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3432}
3433
3434
3435
3436
3437#ifdef VBOX_STRICT
3438#ifdef IN_RC
3439# undef AssertMsgFailed
3440# define AssertMsgFailed Log
3441#endif
3442#ifdef IN_RING3
3443# include <VBox/dbgf.h>
3444
3445/**
3446 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3447 *
3448 * @returns VBox status code (VINF_SUCCESS).
3449 * @param cr3 The root of the hierarchy.
3450 * @param crr The cr4, only PAE and PSE is currently used.
3451 * @param fLongMode Set if long mode, false if not long mode.
3452 * @param cMaxDepth Number of levels to dump.
3453 * @param pHlp Pointer to the output functions.
3454 */
3455RT_C_DECLS_BEGIN
3456VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3457RT_C_DECLS_END
3458
3459#endif
3460
3461/**
3462 * Checks that the shadow page table is in sync with the guest one.
3463 *
3464 * @returns The number of errors.
3465 * @param pVM The virtual machine.
3466 * @param pVCpu The VMCPU handle.
3467 * @param cr3 Guest context CR3 register
3468 * @param cr4 Guest context CR4 register
3469 * @param GCPtr Where to start. Defaults to 0.
3470 * @param cb How much to check. Defaults to everything.
3471 */
3472PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3473{
3474#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3475 return 0;
3476#else
3477 unsigned cErrors = 0;
3478 PVM pVM = pVCpu->CTX_SUFF(pVM);
3479 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3480
3481#if PGM_GST_TYPE == PGM_TYPE_PAE
3482 /** @todo currently broken; crashes below somewhere */
3483 AssertFailed();
3484#endif
3485
3486#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3487 || PGM_GST_TYPE == PGM_TYPE_PAE \
3488 || PGM_GST_TYPE == PGM_TYPE_AMD64
3489
3490# if PGM_GST_TYPE == PGM_TYPE_AMD64
3491 bool fBigPagesSupported = true;
3492# else
3493 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
3494# endif
3495 PPGMCPU pPGM = &pVCpu->pgm.s;
3496 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3497 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3498# ifndef IN_RING0
3499 RTHCPHYS HCPhys; /* general usage. */
3500# endif
3501 int rc;
3502
3503 /*
3504 * Check that the Guest CR3 and all its mappings are correct.
3505 */
3506 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3507 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3508 false);
3509# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3510# if PGM_GST_TYPE == PGM_TYPE_32BIT
3511 rc = PGMShwGetPage(pVCpu, (RTGCPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3512# else
3513 rc = PGMShwGetPage(pVCpu, (RTGCPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3514# endif
3515 AssertRCReturn(rc, 1);
3516 HCPhys = NIL_RTHCPHYS;
3517 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3518 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3519# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3520 pgmGstGet32bitPDPtr(pPGM);
3521 RTGCPHYS GCPhys;
3522 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3523 AssertRCReturn(rc, 1);
3524 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3525# endif
3526# endif /* !IN_RING0 */
3527
3528 /*
3529 * Get and check the Shadow CR3.
3530 */
3531# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3532 unsigned cPDEs = X86_PG_ENTRIES;
3533 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3534# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3535# if PGM_GST_TYPE == PGM_TYPE_32BIT
3536 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3537# else
3538 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3539# endif
3540 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3541# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3542 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3543 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3544# endif
3545 if (cb != ~(RTGCPTR)0)
3546 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3547
3548/** @todo call the other two PGMAssert*() functions. */
3549
3550# if PGM_GST_TYPE == PGM_TYPE_AMD64
3551 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3552
3553 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3554 {
3555 PPGMPOOLPAGE pShwPdpt = NULL;
3556 PX86PML4E pPml4eSrc;
3557 PX86PML4E pPml4eDst;
3558 RTGCPHYS GCPhysPdptSrc;
3559
3560 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3561 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3562
3563 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3564 if (!pPml4eDst->n.u1Present)
3565 {
3566 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3567 continue;
3568 }
3569
3570 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3571 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3572
3573 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3574 {
3575 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3576 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3577 cErrors++;
3578 continue;
3579 }
3580
3581 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3582 {
3583 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3584 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3585 cErrors++;
3586 continue;
3587 }
3588
3589 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3590 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3591 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3592 {
3593 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3594 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3595 cErrors++;
3596 continue;
3597 }
3598# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3599 {
3600# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3601
3602# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3603 /*
3604 * Check the PDPTEs too.
3605 */
3606 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3607
3608 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3609 {
3610 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3611 PPGMPOOLPAGE pShwPde = NULL;
3612 PX86PDPE pPdpeDst;
3613 RTGCPHYS GCPhysPdeSrc;
3614# if PGM_GST_TYPE == PGM_TYPE_PAE
3615 X86PDPE PdpeSrc;
3616 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3617 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3618# else
3619 PX86PML4E pPml4eSrc;
3620 X86PDPE PdpeSrc;
3621 PX86PDPT pPdptDst;
3622 PX86PDPAE pPDDst;
3623 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3624
3625 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3626 if (rc != VINF_SUCCESS)
3627 {
3628 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3629 GCPtr += 512 * _2M;
3630 continue; /* next PDPTE */
3631 }
3632 Assert(pPDDst);
3633# endif
3634 Assert(iPDSrc == 0);
3635
3636 pPdpeDst = &pPdptDst->a[iPdpt];
3637
3638 if (!pPdpeDst->n.u1Present)
3639 {
3640 GCPtr += 512 * _2M;
3641 continue; /* next PDPTE */
3642 }
3643
3644 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3645 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3646
3647 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3648 {
3649 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3650 GCPtr += 512 * _2M;
3651 cErrors++;
3652 continue;
3653 }
3654
3655 if (GCPhysPdeSrc != pShwPde->GCPhys)
3656 {
3657# if PGM_GST_TYPE == PGM_TYPE_AMD64
3658 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3659# else
3660 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3661# endif
3662 GCPtr += 512 * _2M;
3663 cErrors++;
3664 continue;
3665 }
3666
3667# if PGM_GST_TYPE == PGM_TYPE_AMD64
3668 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3669 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3670 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3671 {
3672 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3673 GCPtr += 512 * _2M;
3674 cErrors++;
3675 continue;
3676 }
3677# endif
3678
3679# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3680 {
3681# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3682# if PGM_GST_TYPE == PGM_TYPE_32BIT
3683 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3684# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3685 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3686# endif
3687# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3688 /*
3689 * Iterate the shadow page directory.
3690 */
3691 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3692 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3693
3694 for (;
3695 iPDDst < cPDEs;
3696 iPDDst++, GCPtr += cIncrement)
3697 {
3698# if PGM_SHW_TYPE == PGM_TYPE_PAE
3699 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3700# else
3701 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3702# endif
3703 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3704 {
3705 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3706 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3707 {
3708 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3709 cErrors++;
3710 continue;
3711 }
3712 }
3713 else if ( (PdeDst.u & X86_PDE_P)
3714 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3715 )
3716 {
3717 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3718 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3719 if (!pPoolPage)
3720 {
3721 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3722 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3723 cErrors++;
3724 continue;
3725 }
3726 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3727
3728 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3729 {
3730 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3731 GCPtr, (uint64_t)PdeDst.u));
3732 cErrors++;
3733 }
3734
3735 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3736 {
3737 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3738 GCPtr, (uint64_t)PdeDst.u));
3739 cErrors++;
3740 }
3741
3742 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3743 if (!PdeSrc.n.u1Present)
3744 {
3745 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3746 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3747 cErrors++;
3748 continue;
3749 }
3750
3751 if ( !PdeSrc.b.u1Size
3752 || !fBigPagesSupported)
3753 {
3754 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3755# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3756 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3757# endif
3758 }
3759 else
3760 {
3761# if PGM_GST_TYPE == PGM_TYPE_32BIT
3762 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3763 {
3764 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3765 GCPtr, (uint64_t)PdeSrc.u));
3766 cErrors++;
3767 continue;
3768 }
3769# endif
3770 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3771# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3772 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3773# endif
3774 }
3775
3776 if ( pPoolPage->enmKind
3777 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3778 {
3779 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3780 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3781 cErrors++;
3782 }
3783
3784 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3785 if (!pPhysPage)
3786 {
3787 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3788 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3789 cErrors++;
3790 continue;
3791 }
3792
3793 if (GCPhysGst != pPoolPage->GCPhys)
3794 {
3795 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3796 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3797 cErrors++;
3798 continue;
3799 }
3800
3801 if ( !PdeSrc.b.u1Size
3802 || !fBigPagesSupported)
3803 {
3804 /*
3805 * Page Table.
3806 */
3807 const GSTPT *pPTSrc;
3808 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3809 if (RT_FAILURE(rc))
3810 {
3811 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3812 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3813 cErrors++;
3814 continue;
3815 }
3816 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3817 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3818 {
3819 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3820 // (This problem will go away when/if we shadow multiple CR3s.)
3821 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3822 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3823 cErrors++;
3824 continue;
3825 }
3826 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3827 {
3828 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3829 GCPtr, (uint64_t)PdeDst.u));
3830 cErrors++;
3831 continue;
3832 }
3833
3834 /* iterate the page table. */
3835# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3836 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3837 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3838# else
3839 const unsigned offPTSrc = 0;
3840# endif
3841 for (unsigned iPT = 0, off = 0;
3842 iPT < RT_ELEMENTS(pPTDst->a);
3843 iPT++, off += PAGE_SIZE)
3844 {
3845 const SHWPTE PteDst = pPTDst->a[iPT];
3846
3847 /* skip not-present entries. */
3848 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3849 continue;
3850 Assert(PteDst.n.u1Present);
3851
3852 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3853 if (!PteSrc.n.u1Present)
3854 {
3855# ifdef IN_RING3
3856 PGMAssertHandlerAndFlagsInSync(pVM);
3857 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3858# endif
3859 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3860 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3861 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3862 cErrors++;
3863 continue;
3864 }
3865
3866 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3867# if 1 /** @todo sync accessed bit properly... */
3868 fIgnoreFlags |= X86_PTE_A;
3869# endif
3870
3871 /* match the physical addresses */
3872 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3873 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3874
3875# ifdef IN_RING3
3876 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3877 if (RT_FAILURE(rc))
3878 {
3879 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3880 {
3881 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3882 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3883 cErrors++;
3884 continue;
3885 }
3886 }
3887 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3888 {
3889 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3890 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3891 cErrors++;
3892 continue;
3893 }
3894# endif
3895
3896 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3897 if (!pPhysPage)
3898 {
3899# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3900 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3901 {
3902 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3903 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3904 cErrors++;
3905 continue;
3906 }
3907# endif
3908 if (PteDst.n.u1Write)
3909 {
3910 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3911 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3912 cErrors++;
3913 }
3914 fIgnoreFlags |= X86_PTE_RW;
3915 }
3916 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
3917 {
3918 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3919 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3920 cErrors++;
3921 continue;
3922 }
3923
3924 /* flags */
3925 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3926 {
3927 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3928 {
3929 if (PteDst.n.u1Write)
3930 {
3931 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3932 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3933 cErrors++;
3934 continue;
3935 }
3936 fIgnoreFlags |= X86_PTE_RW;
3937 }
3938 else
3939 {
3940 if (PteDst.n.u1Present)
3941 {
3942 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3943 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3944 cErrors++;
3945 continue;
3946 }
3947 fIgnoreFlags |= X86_PTE_P;
3948 }
3949 }
3950 else
3951 {
3952 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3953 {
3954 if (PteDst.n.u1Write)
3955 {
3956 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3957 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3958 cErrors++;
3959 continue;
3960 }
3961 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3962 {
3963 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3964 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3965 cErrors++;
3966 continue;
3967 }
3968 if (PteDst.n.u1Dirty)
3969 {
3970 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3971 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3972 cErrors++;
3973 }
3974# if 0 /** @todo sync access bit properly... */
3975 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3976 {
3977 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3978 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3979 cErrors++;
3980 }
3981 fIgnoreFlags |= X86_PTE_RW;
3982# else
3983 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3984# endif
3985 }
3986 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3987 {
3988 /* access bit emulation (not implemented). */
3989 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3990 {
3991 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3992 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3993 cErrors++;
3994 continue;
3995 }
3996 if (!PteDst.n.u1Accessed)
3997 {
3998 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3999 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4000 cErrors++;
4001 }
4002 fIgnoreFlags |= X86_PTE_P;
4003 }
4004# ifdef DEBUG_sandervl
4005 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4006# endif
4007 }
4008
4009 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4010 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4011 )
4012 {
4013 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4014 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4015 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4016 cErrors++;
4017 continue;
4018 }
4019 } /* foreach PTE */
4020 }
4021 else
4022 {
4023 /*
4024 * Big Page.
4025 */
4026 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4027 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4028 {
4029 if (PdeDst.n.u1Write)
4030 {
4031 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4032 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4033 cErrors++;
4034 continue;
4035 }
4036 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4037 {
4038 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4039 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4040 cErrors++;
4041 continue;
4042 }
4043# if 0 /** @todo sync access bit properly... */
4044 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4045 {
4046 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4047 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4048 cErrors++;
4049 }
4050 fIgnoreFlags |= X86_PTE_RW;
4051# else
4052 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4053# endif
4054 }
4055 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4056 {
4057 /* access bit emulation (not implemented). */
4058 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4059 {
4060 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4061 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4062 cErrors++;
4063 continue;
4064 }
4065 if (!PdeDst.n.u1Accessed)
4066 {
4067 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4068 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4069 cErrors++;
4070 }
4071 fIgnoreFlags |= X86_PTE_P;
4072 }
4073
4074 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4075 {
4076 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4077 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4078 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4079 cErrors++;
4080 }
4081
4082 /* iterate the page table. */
4083 for (unsigned iPT = 0, off = 0;
4084 iPT < RT_ELEMENTS(pPTDst->a);
4085 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4086 {
4087 const SHWPTE PteDst = pPTDst->a[iPT];
4088
4089 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4090 {
4091 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4092 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4093 cErrors++;
4094 }
4095
4096 /* skip not-present entries. */
4097 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4098 continue;
4099
4100 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4101
4102 /* match the physical addresses */
4103 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4104
4105# ifdef IN_RING3
4106 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4107 if (RT_FAILURE(rc))
4108 {
4109 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4110 {
4111 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4112 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4113 cErrors++;
4114 }
4115 }
4116 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4117 {
4118 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4119 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4120 cErrors++;
4121 continue;
4122 }
4123# endif
4124 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4125 if (!pPhysPage)
4126 {
4127# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4128 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4129 {
4130 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4131 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4132 cErrors++;
4133 continue;
4134 }
4135# endif
4136 if (PteDst.n.u1Write)
4137 {
4138 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4139 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4140 cErrors++;
4141 }
4142 fIgnoreFlags |= X86_PTE_RW;
4143 }
4144 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4145 {
4146 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4147 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4148 cErrors++;
4149 continue;
4150 }
4151
4152 /* flags */
4153 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4154 {
4155 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4156 {
4157 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4158 {
4159 if (PteDst.n.u1Write)
4160 {
4161 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4162 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4163 cErrors++;
4164 continue;
4165 }
4166 fIgnoreFlags |= X86_PTE_RW;
4167 }
4168 }
4169 else
4170 {
4171 if (PteDst.n.u1Present)
4172 {
4173 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4174 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4175 cErrors++;
4176 continue;
4177 }
4178 fIgnoreFlags |= X86_PTE_P;
4179 }
4180 }
4181
4182 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4183 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4184 )
4185 {
4186 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4187 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4188 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4189 cErrors++;
4190 continue;
4191 }
4192 } /* for each PTE */
4193 }
4194 }
4195 /* not present */
4196
4197 } /* for each PDE */
4198
4199 } /* for each PDPTE */
4200
4201 } /* for each PML4E */
4202
4203# ifdef DEBUG
4204 if (cErrors)
4205 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4206# endif
4207
4208#endif /* GST == 32BIT, PAE or AMD64 */
4209 return cErrors;
4210
4211#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4212}
4213#endif /* VBOX_STRICT */
4214
4215
4216/**
4217 * Sets up the CR3 for shadow paging
4218 *
4219 * @returns Strict VBox status code.
4220 * @retval VINF_SUCCESS.
4221 *
4222 * @param pVCpu The VMCPU handle.
4223 * @param GCPhysCR3 The physical address in the CR3 register.
4224 */
4225PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4226{
4227 PVM pVM = pVCpu->CTX_SUFF(pVM);
4228
4229 /* Update guest paging info. */
4230#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4231 || PGM_GST_TYPE == PGM_TYPE_PAE \
4232 || PGM_GST_TYPE == PGM_TYPE_AMD64
4233
4234 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4235
4236 /*
4237 * Map the page CR3 points at.
4238 */
4239 RTHCPTR HCPtrGuestCR3;
4240 RTHCPHYS HCPhysGuestCR3;
4241 pgmLock(pVM);
4242 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4243 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4244 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPage);
4245 /** @todo this needs some reworking wrt. locking. */
4246# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4247 HCPtrGuestCR3 = NIL_RTHCPTR;
4248 int rc = VINF_SUCCESS;
4249# else
4250 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4251# endif
4252 pgmUnlock(pVM);
4253 if (RT_SUCCESS(rc))
4254 {
4255 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4256 if (RT_SUCCESS(rc))
4257 {
4258# ifdef IN_RC
4259 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4260# endif
4261# if PGM_GST_TYPE == PGM_TYPE_32BIT
4262 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4263# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4264 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4265# endif
4266 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))pVM->pgm.s.GCPtrCR3Mapping;
4267
4268# elif PGM_GST_TYPE == PGM_TYPE_PAE
4269 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4270 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4271# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4272 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4273# endif
4274 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RCPTRTYPE(uint8_t *))pVM->pgm.s.GCPtrCR3Mapping + off);
4275 Log(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4276
4277 /*
4278 * Map the 4 PDs too.
4279 */
4280 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVCpu->pgm.s);
4281 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4282 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4283 {
4284 if (pGuestPDPT->a[i].n.u1Present)
4285 {
4286 RTHCPTR HCPtr;
4287 RTHCPHYS HCPhys;
4288 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4289 pgmLock(pVM);
4290 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4291 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4292 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4293# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4294 HCPtr = NIL_RTHCPTR;
4295 int rc2 = VINF_SUCCESS;
4296# else
4297 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4298# endif
4299 pgmUnlock(pVM);
4300 if (RT_SUCCESS(rc2))
4301 {
4302 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4303 AssertRCReturn(rc, rc);
4304
4305 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4306# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4307 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4308# endif
4309 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))GCPtr;
4310 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4311# ifdef IN_RC
4312 PGM_INVL_PG(pVCpu, GCPtr);
4313# endif
4314 continue;
4315 }
4316 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4317 }
4318
4319 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4320# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4321 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4322# endif
4323 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4324 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4325# ifdef IN_RC
4326 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4327# endif
4328 }
4329
4330# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4331 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4332# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4333 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4334# endif
4335# endif
4336 }
4337 else
4338 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4339 }
4340 else
4341 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4342
4343#else /* prot/real stub */
4344 int rc = VINF_SUCCESS;
4345#endif
4346
4347 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4348# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4349 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4350 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4351 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4352 && PGM_GST_TYPE != PGM_TYPE_PROT))
4353
4354 Assert(!HWACCMIsNestedPagingActive(pVM));
4355
4356 /*
4357 * Update the shadow root page as well since that's not fixed.
4358 */
4359 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4360 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4361 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4362 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4363 PPGMPOOLPAGE pNewShwPageCR3;
4364
4365 pgmLock(pVM);
4366
4367 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4368 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4369 AssertFatalRC(rc);
4370 rc = VINF_SUCCESS;
4371
4372# ifdef IN_RC
4373 /*
4374 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4375 * state will be inconsistent! Flush important things now while
4376 * we still can and then make sure there are no ring-3 calls.
4377 */
4378 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4379 VMMRZCallRing3Disable(pVCpu);
4380# endif
4381
4382 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4383 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4384 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4385# ifdef IN_RING0
4386 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4387 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4388# elif defined(IN_RC)
4389 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4390 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4391# else
4392 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4393 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4394# endif
4395
4396# ifndef PGM_WITHOUT_MAPPINGS
4397 /*
4398 * Apply all hypervisor mappings to the new CR3.
4399 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4400 * make sure we check for conflicts in the new CR3 root.
4401 */
4402# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4403 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4404# endif
4405 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4406 AssertRCReturn(rc, rc);
4407# endif
4408
4409 /* Set the current hypervisor CR3. */
4410 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4411 SELMShadowCR3Changed(pVM, pVCpu);
4412
4413# ifdef IN_RC
4414 /* NOTE: The state is consistent again. */
4415 VMMRZCallRing3Enable(pVCpu);
4416# endif
4417
4418 /* Clean up the old CR3 root. */
4419 if ( pOldShwPageCR3
4420 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4421 {
4422 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4423# ifndef PGM_WITHOUT_MAPPINGS
4424 /* Remove the hypervisor mappings from the shadow page table. */
4425 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4426# endif
4427 /* Mark the page as unlocked; allow flushing again. */
4428 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4429
4430 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4431 }
4432 pgmUnlock(pVM);
4433# endif
4434
4435 return rc;
4436}
4437
4438/**
4439 * Unmaps the shadow CR3.
4440 *
4441 * @returns VBox status, no specials.
4442 * @param pVCpu The VMCPU handle.
4443 */
4444PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4445{
4446 LogFlow(("UnmapCR3\n"));
4447
4448 int rc = VINF_SUCCESS;
4449 PVM pVM = pVCpu->CTX_SUFF(pVM);
4450
4451 /*
4452 * Update guest paging info.
4453 */
4454#if PGM_GST_TYPE == PGM_TYPE_32BIT
4455 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4456# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4457 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4458# endif
4459 pVCpu->pgm.s.pGst32BitPdRC = 0;
4460
4461#elif PGM_GST_TYPE == PGM_TYPE_PAE
4462 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4463# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4464 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4465# endif
4466 pVCpu->pgm.s.pGstPaePdptRC = 0;
4467 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4468 {
4469 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4470# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4471 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4472# endif
4473 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4474 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4475 }
4476
4477#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4478 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4479# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4480 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4481# endif
4482
4483#else /* prot/real mode stub */
4484 /* nothing to do */
4485#endif
4486
4487#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4488 /*
4489 * Update shadow paging info.
4490 */
4491# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4492 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4493 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4494
4495# if PGM_GST_TYPE != PGM_TYPE_REAL
4496 Assert(!HWACCMIsNestedPagingActive(pVM));
4497# endif
4498
4499 pgmLock(pVM);
4500
4501# ifndef PGM_WITHOUT_MAPPINGS
4502 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4503 /* Remove the hypervisor mappings from the shadow page table. */
4504 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4505# endif
4506
4507 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4508 {
4509 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4510
4511 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4512
4513 /* Mark the page as unlocked; allow flushing again. */
4514 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4515
4516 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4517 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4518 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4519 pVCpu->pgm.s.pShwPageCR3RC = 0;
4520 pVCpu->pgm.s.iShwUser = 0;
4521 pVCpu->pgm.s.iShwUserTable = 0;
4522 }
4523 pgmUnlock(pVM);
4524# endif
4525#endif /* !IN_RC*/
4526
4527 return rc;
4528}
4529
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette