VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 21695

Last change on this file since 21695 was 21175, checked in by vboxsync, 16 years ago

Don't release the PGM lock when calling the PGM pool handler. (pointless as we'll request the lock again immediately)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 195.4 KB
Line 
1/* $Id: PGMAllBth.h 21175 2009-07-02 15:59:21Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27RT_C_DECLS_BEGIN
28PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
43PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
44RT_C_DECLS_END
45
46
47/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
48#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
49# error "Invalid combination; PAE guest implies PAE shadow"
50#endif
51
52#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
53 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
54# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
55#endif
56
57#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
58 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
59# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
60#endif
61
62#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
63 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
64# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
65#endif
66
67#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
68# define PGM_WITHOUT_MAPPINGS
69#endif
70
71
72#ifndef IN_RING3
73/**
74 * #PF Handler for raw-mode guest execution.
75 *
76 * @returns VBox status code (appropriate for trap handling and GC return).
77 *
78 * @param pVCpu VMCPU Handle.
79 * @param uErr The trap error code.
80 * @param pRegFrame Trap register frame.
81 * @param pvFault The fault address.
82 */
83PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
84{
85 PVM pVM = pVCpu->CTX_SUFF(pVM);
86
87# if defined(IN_RC) && defined(VBOX_STRICT)
88 PGMDynCheckLocks(pVM);
89# endif
90
91# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
92 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
93 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
94
95# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
96 /*
97 * Hide the instruction fetch trap indicator for now.
98 */
99 /** @todo NXE will change this and we must fix NXE in the switcher too! */
100 if (uErr & X86_TRAP_PF_ID)
101 {
102 uErr &= ~X86_TRAP_PF_ID;
103 TRPMSetErrorCode(pVCpu, uErr);
104 }
105# endif
106
107 /*
108 * Get PDs.
109 */
110 int rc;
111# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
112# if PGM_GST_TYPE == PGM_TYPE_32BIT
113 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
114 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
115
116# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
117
118# if PGM_GST_TYPE == PGM_TYPE_PAE
119 unsigned iPDSrc = 0; /* initialized to shut up gcc */
120 X86PDPE PdpeSrc;
121 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
122
123# elif PGM_GST_TYPE == PGM_TYPE_AMD64
124 unsigned iPDSrc = 0; /* initialized to shut up gcc */
125 PX86PML4E pPml4eSrc;
126 X86PDPE PdpeSrc;
127 PGSTPD pPDSrc;
128
129 pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
130 Assert(pPml4eSrc);
131# endif
132
133 /* Quick check for a valid guest trap. (PAE & AMD64) */
134 if (!pPDSrc)
135 {
136# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
137 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
138# else
139 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
140# endif
141 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
142 TRPMSetErrorCode(pVCpu, uErr);
143 return VINF_EM_RAW_GUEST_TRAP;
144 }
145# endif
146
147# else /* !PGM_WITH_PAGING */
148 PGSTPD pPDSrc = NULL;
149 const unsigned iPDSrc = 0;
150# endif /* !PGM_WITH_PAGING */
151
152 /* Fetch the guest PDE */
153# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
154 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
155# else
156 GSTPDE PdeSrc;
157 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
158 PdeSrc.n.u1Present = 1;
159 PdeSrc.n.u1Write = 1;
160 PdeSrc.n.u1Accessed = 1;
161 PdeSrc.n.u1User = 1;
162# endif
163
164# if PGM_SHW_TYPE == PGM_TYPE_32BIT
165 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
166 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
167
168# elif PGM_SHW_TYPE == PGM_TYPE_PAE
169 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
170
171 PX86PDPAE pPDDst;
172# if PGM_GST_TYPE != PGM_TYPE_PAE
173 X86PDPE PdpeSrc;
174
175 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
176 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
177# endif
178 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
179 if (rc != VINF_SUCCESS)
180 {
181 AssertRC(rc);
182 return rc;
183 }
184 Assert(pPDDst);
185
186# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
187 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
188 PX86PDPAE pPDDst;
189# if PGM_GST_TYPE == PGM_TYPE_PROT
190 /* AMD-V nested paging */
191 X86PML4E Pml4eSrc;
192 X86PDPE PdpeSrc;
193 PX86PML4E pPml4eSrc = &Pml4eSrc;
194
195 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
196 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
197 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
198# endif
199
200 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
201 if (rc != VINF_SUCCESS)
202 {
203 AssertRC(rc);
204 return rc;
205 }
206 Assert(pPDDst);
207
208# elif PGM_SHW_TYPE == PGM_TYPE_EPT
209 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
210 PEPTPD pPDDst;
211
212 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
213 if (rc != VINF_SUCCESS)
214 {
215 AssertRC(rc);
216 return rc;
217 }
218 Assert(pPDDst);
219# endif
220
221# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
222 /*
223 * If we successfully correct the write protection fault due to dirty bit
224 * tracking, or this page fault is a genuine one, then return immediately.
225 */
226 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
227 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
228 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
229 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
230 || rc == VINF_EM_RAW_GUEST_TRAP)
231 {
232 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
233 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
234 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
235 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
236 }
237
238 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
239# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
240
241 /*
242 * A common case is the not-present error caused by lazy page table syncing.
243 *
244 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
245 * so we can safely assume that the shadow PT is present when calling SyncPage later.
246 *
247 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
248 * of mapping conflict and defer to SyncCR3 in R3.
249 * (Again, we do NOT support access handlers for non-present guest pages.)
250 *
251 */
252 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
253 && !pPDDst->a[iPDDst].n.u1Present
254 && PdeSrc.n.u1Present
255 )
256 {
257 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
258 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
259 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
260 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
261 if (RT_SUCCESS(rc))
262 {
263 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
264 return rc;
265 }
266 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
267 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
268 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
269 return VINF_PGM_SYNC_CR3;
270 }
271
272# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
273 /*
274 * Check if this address is within any of our mappings.
275 *
276 * This is *very* fast and it's gonna save us a bit of effort below and prevent
277 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
278 * (BTW, it's impossible to have physical access handlers in a mapping.)
279 */
280 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
281 {
282 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
283 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
284 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
285 {
286 if (pvFault < pMapping->GCPtr)
287 break;
288 if (pvFault - pMapping->GCPtr < pMapping->cb)
289 {
290 /*
291 * The first thing we check is if we've got an undetected conflict.
292 */
293 if (!pVM->pgm.s.fMappingsFixed)
294 {
295 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
296 while (iPT-- > 0)
297 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
298 {
299 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
300 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
301 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
302 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
303 return VINF_PGM_SYNC_CR3;
304 }
305 }
306
307 /*
308 * Check if the fault address is in a virtual page access handler range.
309 */
310 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
311 if ( pCur
312 && pvFault - pCur->Core.Key < pCur->cb
313 && uErr & X86_TRAP_PF_RW)
314 {
315# ifdef IN_RC
316 STAM_PROFILE_START(&pCur->Stat, h);
317 pgmUnlock(pVM);
318 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
319 pgmLock(pVM);
320 STAM_PROFILE_STOP(&pCur->Stat, h);
321# else
322 AssertFailed();
323 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
324# endif
325 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
326 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
327 return rc;
328 }
329
330 /*
331 * Pretend we're not here and let the guest handle the trap.
332 */
333 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
334 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
335 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
336 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
337 return VINF_EM_RAW_GUEST_TRAP;
338 }
339 }
340 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
341 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
342# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
343
344 /*
345 * Check if this fault address is flagged for special treatment,
346 * which means we'll have to figure out the physical address and
347 * check flags associated with it.
348 *
349 * ASSUME that we can limit any special access handling to pages
350 * in page tables which the guest believes to be present.
351 */
352 if (PdeSrc.n.u1Present)
353 {
354 RTGCPHYS GCPhys = NIL_RTGCPHYS;
355
356# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
357# if PGM_GST_TYPE == PGM_TYPE_AMD64
358 bool fBigPagesSupported = true;
359# else
360 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
361# endif
362 if ( PdeSrc.b.u1Size
363 && fBigPagesSupported)
364 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
365 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
366 else
367 {
368 PGSTPT pPTSrc;
369 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
370 if (RT_SUCCESS(rc))
371 {
372 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
373 if (pPTSrc->a[iPTESrc].n.u1Present)
374 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
375 }
376 }
377# else
378 /* No paging so the fault address is the physical address */
379 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
380# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
381
382 /*
383 * If we have a GC address we'll check if it has any flags set.
384 */
385 if (GCPhys != NIL_RTGCPHYS)
386 {
387 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
388
389 PPGMPAGE pPage;
390 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
391 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
392 {
393 if ( PGM_PAGE_HAS_ACTIVE_PHYSICAL_HANDLERS(pPage)
394 || PGM_PAGE_HAS_ACTIVE_VIRTUAL_HANDLERS(pPage))
395 {
396 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
397 {
398 /*
399 * Physical page access handler.
400 */
401 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
402 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
403 if (pCur)
404 {
405# ifdef PGM_SYNC_N_PAGES
406 /*
407 * If the region is write protected and we got a page not present fault, then sync
408 * the pages. If the fault was caused by a read, then restart the instruction.
409 * In case of write access continue to the GC write handler.
410 *
411 * ASSUMES that there is only one handler per page or that they have similar write properties.
412 */
413 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
414 && !(uErr & X86_TRAP_PF_P))
415 {
416 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
417 if ( RT_FAILURE(rc)
418 || !(uErr & X86_TRAP_PF_RW)
419 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
420 {
421 AssertRC(rc);
422 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
423 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
424 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
425 return rc;
426 }
427 }
428# endif
429
430 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
431 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
432 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
433
434# if defined(IN_RC) || defined(IN_RING0)
435 if (pCur->CTX_SUFF(pfnHandler))
436 {
437 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
438# ifdef IN_RING0
439 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
440# else
441 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
442# endif
443 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
444 void *pvUser = pCur->CTX_SUFF(pvUser);
445
446 STAM_PROFILE_START(&pCur->Stat, h);
447 if (fLeaveLock)
448 pgmUnlock(pVM); /* @todo: Not entirely safe. */
449
450 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
451 if (fLeaveLock)
452 pgmLock(pVM);
453# ifdef VBOX_WITH_STATISTICS
454 pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
455 if (pCur)
456 STAM_PROFILE_STOP(&pCur->Stat, h);
457# else
458 pCur = NULL; /* might be invalid by now. */
459# endif
460
461 }
462 else
463# endif
464 rc = VINF_EM_RAW_EMULATE_INSTR;
465
466 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
467 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
468 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
469 return rc;
470 }
471 }
472# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
473 else
474 {
475# ifdef PGM_SYNC_N_PAGES
476 /*
477 * If the region is write protected and we got a page not present fault, then sync
478 * the pages. If the fault was caused by a read, then restart the instruction.
479 * In case of write access continue to the GC write handler.
480 */
481 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
482 && !(uErr & X86_TRAP_PF_P))
483 {
484 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
485 if ( RT_FAILURE(rc)
486 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
487 || !(uErr & X86_TRAP_PF_RW))
488 {
489 AssertRC(rc);
490 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
491 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
492 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
493 return rc;
494 }
495 }
496# endif
497 /*
498 * Ok, it's an virtual page access handler.
499 *
500 * Since it's faster to search by address, we'll do that first
501 * and then retry by GCPhys if that fails.
502 */
503 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
504 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
505 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
506 */
507 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
508 if (pCur)
509 {
510 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
511 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
512 || !(uErr & X86_TRAP_PF_P)
513 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
514 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
515
516 if ( pvFault - pCur->Core.Key < pCur->cb
517 && ( uErr & X86_TRAP_PF_RW
518 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
519 {
520# ifdef IN_RC
521 STAM_PROFILE_START(&pCur->Stat, h);
522 pgmUnlock(pVM);
523 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
524 pgmLock(pVM);
525 STAM_PROFILE_STOP(&pCur->Stat, h);
526# else
527 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
528# endif
529 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
530 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
531 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
532 return rc;
533 }
534 /* Unhandled part of a monitored page */
535 }
536 else
537 {
538 /* Check by physical address. */
539 PPGMVIRTHANDLER pCur;
540 unsigned iPage;
541 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
542 &pCur, &iPage);
543 Assert(RT_SUCCESS(rc) || !pCur);
544 if ( pCur
545 && ( uErr & X86_TRAP_PF_RW
546 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
547 {
548 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
549# ifdef IN_RC
550 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
551 Assert(off < pCur->cb);
552 STAM_PROFILE_START(&pCur->Stat, h);
553 pgmUnlock(pVM);
554 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
555 pgmLock(pVM);
556 STAM_PROFILE_STOP(&pCur->Stat, h);
557# else
558 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
559# endif
560 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
561 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
562 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
563 return rc;
564 }
565 }
566 }
567# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
568
569 /*
570 * There is a handled area of the page, but this fault doesn't belong to it.
571 * We must emulate the instruction.
572 *
573 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
574 * we first check if this was a page-not-present fault for a page with only
575 * write access handlers. Restart the instruction if it wasn't a write access.
576 */
577 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
578
579 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
580 && !(uErr & X86_TRAP_PF_P))
581 {
582 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
583 if ( RT_FAILURE(rc)
584 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
585 || !(uErr & X86_TRAP_PF_RW))
586 {
587 AssertRC(rc);
588 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
589 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
590 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
591 return rc;
592 }
593 }
594
595 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
596 * It's writing to an unhandled part of the LDT page several million times.
597 */
598 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
599 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
600 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
601 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
602 return rc;
603 } /* if any kind of handler */
604
605# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
606 if (uErr & X86_TRAP_PF_P)
607 {
608 /*
609 * The page isn't marked, but it might still be monitored by a virtual page access handler.
610 * (ASSUMES no temporary disabling of virtual handlers.)
611 */
612 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
613 * we should correct both the shadow page table and physical memory flags, and not only check for
614 * accesses within the handler region but for access to pages with virtual handlers. */
615 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
616 if (pCur)
617 {
618 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
619 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
620 || !(uErr & X86_TRAP_PF_P)
621 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
622 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
623
624 if ( pvFault - pCur->Core.Key < pCur->cb
625 && ( uErr & X86_TRAP_PF_RW
626 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
627 {
628# ifdef IN_RC
629 STAM_PROFILE_START(&pCur->Stat, h);
630 pgmUnlock(pVM);
631 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
632 pgmLock(pVM);
633 STAM_PROFILE_STOP(&pCur->Stat, h);
634# else
635 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
636# endif
637 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
638 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
639 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
640 return rc;
641 }
642 }
643 }
644# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
645 }
646 else
647 {
648 /*
649 * When the guest accesses invalid physical memory (e.g. probing
650 * of RAM or accessing a remapped MMIO range), then we'll fall
651 * back to the recompiler to emulate the instruction.
652 */
653 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
654 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
655 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
656 return VINF_EM_RAW_EMULATE_INSTR;
657 }
658
659 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
660
661# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
662 /*
663 * We are here only if page is present in Guest page tables and
664 * trap is not handled by our handlers.
665 *
666 * Check it for page out-of-sync situation.
667 */
668 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
669
670 if (!(uErr & X86_TRAP_PF_P))
671 {
672 /*
673 * Page is not present in our page tables.
674 * Try to sync it!
675 * BTW, fPageShw is invalid in this branch!
676 */
677 if (uErr & X86_TRAP_PF_US)
678 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
679 else /* supervisor */
680 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
681
682# if defined(LOG_ENABLED) && !defined(IN_RING0)
683 RTGCPHYS GCPhys;
684 uint64_t fPageGst;
685 PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys);
686 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%RGp scan=%d\n",
687 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)));
688# endif /* LOG_ENABLED */
689
690# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
691 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
692 {
693 uint64_t fPageGst;
694 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
695 if ( RT_SUCCESS(rc)
696 && !(fPageGst & X86_PTE_US))
697 {
698 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
699 if ( pvFault == (RTGCPTR)pRegFrame->eip
700 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
701# ifdef CSAM_DETECT_NEW_CODE_PAGES
702 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
703 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)) /* any new code we encounter here */
704# endif /* CSAM_DETECT_NEW_CODE_PAGES */
705 )
706 {
707 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
708 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
709 if (rc != VINF_SUCCESS)
710 {
711 /*
712 * CSAM needs to perform a job in ring 3.
713 *
714 * Sync the page before going to the host context; otherwise we'll end up in a loop if
715 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
716 */
717 LogFlow(("CSAM ring 3 job\n"));
718 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
719 AssertRC(rc2);
720
721 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
722 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
723 return rc;
724 }
725 }
726# ifdef CSAM_DETECT_NEW_CODE_PAGES
727 else if ( uErr == X86_TRAP_PF_RW
728 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
729 && pRegFrame->ecx < 0x10000)
730 {
731 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
732 * to detect loading of new code pages.
733 */
734
735 /*
736 * Decode the instruction.
737 */
738 RTGCPTR PC;
739 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
740 if (rc == VINF_SUCCESS)
741 {
742 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
743 uint32_t cbOp;
744 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
745
746 /* For now we'll restrict this to rep movsw/d instructions */
747 if ( rc == VINF_SUCCESS
748 && pDis->pCurInstr->opcode == OP_MOVSWD
749 && (pDis->prefix & PREFIX_REP))
750 {
751 CSAMMarkPossibleCodePage(pVM, pvFault);
752 }
753 }
754 }
755# endif /* CSAM_DETECT_NEW_CODE_PAGES */
756
757 /*
758 * Mark this page as safe.
759 */
760 /** @todo not correct for pages that contain both code and data!! */
761 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
762 CSAMMarkPage(pVM, (RTRCPTR)pvFault, true);
763 }
764 }
765# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
766 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
767 if (RT_SUCCESS(rc))
768 {
769 /* The page was successfully synced, return to the guest. */
770 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
771 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
772 return VINF_SUCCESS;
773 }
774 }
775 else /* uErr & X86_TRAP_PF_P: */
776 {
777 /*
778 * Write protected pages are make writable when the guest makes the first
779 * write to it. This happens for pages that are shared, write monitored
780 * and not yet allocated.
781 *
782 * Also, a side effect of not flushing global PDEs are out of sync pages due
783 * to physical monitored regions, that are no longer valid.
784 * Assume for now it only applies to the read/write flag.
785 */
786 if (RT_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
787 {
788 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
789 {
790 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n",
791 GCPhys, pPage, pvFault, uErr));
792 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
793 if (rc != VINF_SUCCESS)
794 {
795 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
796 return rc;
797 }
798 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
799 return VINF_EM_NO_MEMORY;
800 }
801
802 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
803 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
804 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG))
805 {
806 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
807 uint64_t fPageGst;
808 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
809 if ( RT_SUCCESS(rc)
810 && !(fPageGst & X86_PTE_RW))
811 {
812 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
813 if (RT_SUCCESS(rc))
814 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
815 else
816 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
817 return rc;
818 }
819 AssertMsg(RT_SUCCESS(rc), ("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
820 }
821
822 /// @todo count the above case; else
823 if (uErr & X86_TRAP_PF_US)
824 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
825 else /* supervisor */
826 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
827
828 /*
829 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
830 * page is not present, which is not true in this case.
831 */
832 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
833 if (RT_SUCCESS(rc))
834 {
835 /*
836 * Page was successfully synced, return to guest.
837 */
838# ifdef VBOX_STRICT
839 RTGCPHYS GCPhys;
840 uint64_t fPageGst;
841 if (!HWACCMIsNestedPagingActive(pVM))
842 {
843 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys);
844 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%d fPageGst=%RX64\n"));
845 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
846 }
847 uint64_t fPageShw;
848 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
849 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCPUs > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
850# endif /* VBOX_STRICT */
851 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
852 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
853 return VINF_SUCCESS;
854 }
855 }
856
857# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
858# ifdef VBOX_STRICT
859 /*
860 * Check for VMM page flags vs. Guest page flags consistency.
861 * Currently only for debug purposes.
862 */
863 if (RT_SUCCESS(rc))
864 {
865 /* Get guest page flags. */
866 uint64_t fPageGst;
867 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
868 if (RT_SUCCESS(rc))
869 {
870 uint64_t fPageShw;
871 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
872
873 /*
874 * Compare page flags.
875 * Note: we have AVL, A, D bits desynched.
876 */
877 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
878 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
879 }
880 else
881 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
882 }
883 else
884 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
885# endif /* VBOX_STRICT */
886# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
887 }
888 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
889# endif /* PGM_OUT_OF_SYNC_IN_GC */
890 }
891 else /* GCPhys == NIL_RTGCPHYS */
892 {
893 /*
894 * Page not present in Guest OS or invalid page table address.
895 * This is potential virtual page access handler food.
896 *
897 * For the present we'll say that our access handlers don't
898 * work for this case - we've already discarded the page table
899 * not present case which is identical to this.
900 *
901 * When we perchance find we need this, we will probably have AVL
902 * trees (offset based) to operate on and we can measure their speed
903 * agains mapping a page table and probably rearrange this handling
904 * a bit. (Like, searching virtual ranges before checking the
905 * physical address.)
906 */
907 }
908 }
909 /* else: !present (guest) */
910
911
912# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
913 /*
914 * Conclusion, this is a guest trap.
915 */
916 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
917 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
918 return VINF_EM_RAW_GUEST_TRAP;
919# else
920 /* present, but not a monitored page; perhaps the guest is probing physical memory */
921 return VINF_EM_RAW_EMULATE_INSTR;
922# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
923
924
925# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
926
927 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
928 return VERR_INTERNAL_ERROR;
929# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
930}
931#endif /* !IN_RING3 */
932
933
934/**
935 * Emulation of the invlpg instruction.
936 *
937 *
938 * @returns VBox status code.
939 *
940 * @param pVCpu The VMCPU handle.
941 * @param GCPtrPage Page to invalidate.
942 *
943 * @remark ASSUMES that the guest is updating before invalidating. This order
944 * isn't required by the CPU, so this is speculative and could cause
945 * trouble.
946 * @remark No TLB shootdown is done on any other VCPU as we assume that
947 * invlpg emulation is the *only* reason for calling this function.
948 * (The guest has to shoot down TLB entries on other CPUs itself)
949 * Currently true, but keep in mind!
950 *
951 * @todo Flush page or page directory only if necessary!
952 * @todo Add a #define for simply invalidating the page.
953 */
954PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
955{
956#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
957 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
958 && PGM_SHW_TYPE != PGM_TYPE_EPT
959 int rc;
960 PVM pVM = pVCpu->CTX_SUFF(pVM);
961 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
962
963 Assert(PGMIsLockOwner(pVM));
964
965 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
966 /*
967 * Get the shadow PD entry and skip out if this PD isn't present.
968 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
969 */
970# if PGM_SHW_TYPE == PGM_TYPE_32BIT
971 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
972 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
973
974 /* Fetch the pgm pool shadow descriptor. */
975 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
976 Assert(pShwPde);
977
978# elif PGM_SHW_TYPE == PGM_TYPE_PAE
979 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
980 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
981
982 /* If the shadow PDPE isn't present, then skip the invalidate. */
983 if (!pPdptDst->a[iPdpt].n.u1Present)
984 {
985 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
986 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
987 return VINF_SUCCESS;
988 }
989
990 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
991 PPGMPOOLPAGE pShwPde = NULL;
992 PX86PDPAE pPDDst;
993
994 /* Fetch the pgm pool shadow descriptor. */
995 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
996 AssertRCSuccessReturn(rc, rc);
997 Assert(pShwPde);
998
999 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1000 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1001
1002# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1003 /* PML4 */
1004 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1005 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1006 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1007 PX86PDPAE pPDDst;
1008 PX86PDPT pPdptDst;
1009 PX86PML4E pPml4eDst;
1010 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1011 if (rc != VINF_SUCCESS)
1012 {
1013 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1014 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1015 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
1016 PGM_INVL_VCPU_TLBS(pVCpu);
1017 return VINF_SUCCESS;
1018 }
1019 Assert(pPDDst);
1020
1021 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1022 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1023
1024 if (!pPdpeDst->n.u1Present)
1025 {
1026 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1027 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
1028 PGM_INVL_VCPU_TLBS(pVCpu);
1029 return VINF_SUCCESS;
1030 }
1031
1032# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1033
1034 const SHWPDE PdeDst = *pPdeDst;
1035 if (!PdeDst.n.u1Present)
1036 {
1037 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1038 return VINF_SUCCESS;
1039 }
1040
1041# if defined(IN_RC)
1042 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1043 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1044# endif
1045
1046 /*
1047 * Get the guest PD entry and calc big page.
1048 */
1049# if PGM_GST_TYPE == PGM_TYPE_32BIT
1050 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
1051 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1052 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1053# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1054 unsigned iPDSrc = 0;
1055# if PGM_GST_TYPE == PGM_TYPE_PAE
1056 X86PDPE PdpeSrc;
1057 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1058# else /* AMD64 */
1059 PX86PML4E pPml4eSrc;
1060 X86PDPE PdpeSrc;
1061 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1062# endif
1063 GSTPDE PdeSrc;
1064
1065 if (pPDSrc)
1066 PdeSrc = pPDSrc->a[iPDSrc];
1067 else
1068 PdeSrc.u = 0;
1069# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1070
1071# if PGM_GST_TYPE == PGM_TYPE_AMD64
1072 const bool fIsBigPage = PdeSrc.b.u1Size;
1073# else
1074 const bool fIsBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
1075# endif
1076
1077# ifdef IN_RING3
1078 /*
1079 * If a CR3 Sync is pending we may ignore the invalidate page operation
1080 * depending on the kind of sync and if it's a global page or not.
1081 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1082 */
1083# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1084 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1085 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1086 && fIsBigPage
1087 && PdeSrc.b.u1Global
1088 )
1089 )
1090# else
1091 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1092# endif
1093 {
1094 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1095 return VINF_SUCCESS;
1096 }
1097# endif /* IN_RING3 */
1098
1099# if PGM_GST_TYPE == PGM_TYPE_AMD64
1100 /* Fetch the pgm pool shadow descriptor. */
1101 PPGMPOOLPAGE pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
1102 Assert(pShwPdpt);
1103
1104 /* Fetch the pgm pool shadow descriptor. */
1105 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1106 Assert(pShwPde);
1107
1108 Assert(pPml4eDst->n.u1Present && (pPml4eDst->u & SHW_PDPT_MASK));
1109 RTGCPHYS GCPhysPdpt = pPml4eSrc->u & X86_PML4E_PG_MASK;
1110
1111 if ( !pPml4eSrc->n.u1Present
1112 || pShwPdpt->GCPhys != GCPhysPdpt)
1113 {
1114 LogFlow(("InvalidatePage: Out-of-sync PML4E (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1115 GCPtrPage, pShwPdpt->GCPhys, GCPhysPdpt, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1116 pgmPoolFreeByPage(pPool, pShwPdpt, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1117 ASMAtomicWriteSize(pPml4eDst, 0);
1118 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1119 PGM_INVL_VCPU_TLBS(pVCpu);
1120 return VINF_SUCCESS;
1121 }
1122 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
1123 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
1124 {
1125 /*
1126 * Mark not present so we can resync the PML4E when it's used.
1127 */
1128 LogFlow(("InvalidatePage: Out-of-sync PML4E at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1129 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1130 pgmPoolFreeByPage(pPool, pShwPdpt, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1131 ASMAtomicWriteSize(pPml4eDst, 0);
1132 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1133 PGM_INVL_VCPU_TLBS(pVCpu);
1134 }
1135 else if (!pPml4eSrc->n.u1Accessed)
1136 {
1137 /*
1138 * Mark not present so we can set the accessed bit.
1139 */
1140 LogFlow(("InvalidatePage: Out-of-sync PML4E (A) at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1141 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1142 pgmPoolFreeByPage(pPool, pShwPdpt, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1143 ASMAtomicWriteSize(pPml4eDst, 0);
1144 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1145 PGM_INVL_VCPU_TLBS(pVCpu);
1146 }
1147
1148 /* Check if the PDPT entry has changed. */
1149 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
1150 RTGCPHYS GCPhysPd = PdpeSrc.u & GST_PDPE_PG_MASK;
1151 if ( !PdpeSrc.n.u1Present
1152 || pShwPde->GCPhys != GCPhysPd)
1153 {
1154 LogFlow(("InvalidatePage: Out-of-sync PDPE (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1155 GCPtrPage, pShwPde->GCPhys, GCPhysPd, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1156 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1157 ASMAtomicWriteSize(pPdpeDst, 0);
1158 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1159 PGM_INVL_VCPU_TLBS(pVCpu);
1160 return VINF_SUCCESS;
1161 }
1162 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
1163 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
1164 {
1165 /*
1166 * Mark not present so we can resync the PDPTE when it's used.
1167 */
1168 LogFlow(("InvalidatePage: Out-of-sync PDPE at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1169 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1170 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1171 ASMAtomicWriteSize(pPdpeDst, 0);
1172 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1173 PGM_INVL_VCPU_TLBS(pVCpu);
1174 }
1175 else if (!PdpeSrc.lm.u1Accessed)
1176 {
1177 /*
1178 * Mark not present so we can set the accessed bit.
1179 */
1180 LogFlow(("InvalidatePage: Out-of-sync PDPE (A) at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1181 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1182 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1183 ASMAtomicWriteSize(pPdpeDst, 0);
1184 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1185 PGM_INVL_VCPU_TLBS(pVCpu);
1186 }
1187# endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1188
1189 /*
1190 * Deal with the Guest PDE.
1191 */
1192 rc = VINF_SUCCESS;
1193 if (PdeSrc.n.u1Present)
1194 {
1195# ifndef PGM_WITHOUT_MAPPING
1196 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1197 {
1198 /*
1199 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1200 */
1201 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1202 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1203 pgmLock(pVM);
1204 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1205 pgmUnlock(pVM);
1206 }
1207 else
1208# endif /* !PGM_WITHOUT_MAPPING */
1209 if ( PdeSrc.n.u1User != PdeDst.n.u1User
1210 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1211 {
1212 /*
1213 * Mark not present so we can resync the PDE when it's used.
1214 */
1215 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1216 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1217 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1218 ASMAtomicWriteSize(pPdeDst, 0);
1219 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1220 PGM_INVL_VCPU_TLBS(pVCpu);
1221 }
1222 else if (!PdeSrc.n.u1Accessed)
1223 {
1224 /*
1225 * Mark not present so we can set the accessed bit.
1226 */
1227 LogFlow(("InvalidatePage: Out-of-sync (A) at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1228 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1229 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1230 ASMAtomicWriteSize(pPdeDst, 0);
1231 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1232 PGM_INVL_VCPU_TLBS(pVCpu);
1233 }
1234 else if (!fIsBigPage)
1235 {
1236 /*
1237 * 4KB - page.
1238 */
1239 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1240 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1241# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1242 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1243 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1244# endif
1245 if (pShwPage->GCPhys == GCPhys)
1246 {
1247# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1248 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1249 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1250 if (pPT->a[iPTEDst].n.u1Present)
1251 {
1252# ifdef PGMPOOL_WITH_USER_TRACKING
1253 /* This is very unlikely with caching/monitoring enabled. */
1254 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1255# endif
1256 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1257 }
1258# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1259 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1260 if (RT_SUCCESS(rc))
1261 rc = VINF_SUCCESS;
1262# endif
1263 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1264 PGM_INVL_PG(pVCpu, GCPtrPage);
1265 }
1266 else
1267 {
1268 /*
1269 * The page table address changed.
1270 */
1271 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1272 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1273 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1274 ASMAtomicWriteSize(pPdeDst, 0);
1275 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1276 PGM_INVL_VCPU_TLBS(pVCpu);
1277 }
1278 }
1279 else
1280 {
1281 /*
1282 * 2/4MB - page.
1283 */
1284 /* Before freeing the page, check if anything really changed. */
1285 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1286 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1287# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1288 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1289 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1290# endif
1291 if ( pShwPage->GCPhys == GCPhys
1292 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1293 {
1294 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1295 /** @todo PAT */
1296 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1297 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1298 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1299 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1300 {
1301 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1302 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1303# if defined(IN_RC)
1304 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1305 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1306# endif
1307 return VINF_SUCCESS;
1308 }
1309 }
1310
1311 /*
1312 * Ok, the page table is present and it's been changed in the guest.
1313 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1314 * We could do this for some flushes in GC too, but we need an algorithm for
1315 * deciding which 4MB pages containing code likely to be executed very soon.
1316 */
1317 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1318 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1319 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1320 ASMAtomicWriteSize(pPdeDst, 0);
1321 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1322 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1323 }
1324 }
1325 else
1326 {
1327 /*
1328 * Page directory is not present, mark shadow PDE not present.
1329 */
1330 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1331 {
1332 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1333 ASMAtomicWriteSize(pPdeDst, 0);
1334 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1335 PGM_INVL_PG(pVCpu, GCPtrPage);
1336 }
1337 else
1338 {
1339 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1340 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1341 }
1342 }
1343# if defined(IN_RC)
1344 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1345 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1346# endif
1347 return rc;
1348
1349#else /* guest real and protected mode */
1350 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1351 return VINF_SUCCESS;
1352#endif
1353}
1354
1355
1356#ifdef PGMPOOL_WITH_USER_TRACKING
1357/**
1358 * Update the tracking of shadowed pages.
1359 *
1360 * @param pVCpu The VMCPU handle.
1361 * @param pShwPage The shadow page.
1362 * @param HCPhys The physical page we is being dereferenced.
1363 */
1364DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1365{
1366# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1367 PVM pVM = pVCpu->CTX_SUFF(pVM);
1368
1369 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1370 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1371
1372 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1373 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1374 * 2. write protect all shadowed pages. I.e. implement caching.
1375 */
1376 /*
1377 * Find the guest address.
1378 */
1379 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1380 pRam;
1381 pRam = pRam->CTX_SUFF(pNext))
1382 {
1383 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1384 while (iPage-- > 0)
1385 {
1386 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1387 {
1388 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1389 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1390 pShwPage->cPresent--;
1391 pPool->cPresent--;
1392 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1393 return;
1394 }
1395 }
1396 }
1397
1398 for (;;)
1399 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1400# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1401 pShwPage->cPresent--;
1402 pVM->pgm.s.CTX_SUFF(pPool)->cPresent--;
1403# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1404}
1405
1406
1407/**
1408 * Update the tracking of shadowed pages.
1409 *
1410 * @param pVCpu The VMCPU handle.
1411 * @param pShwPage The shadow page.
1412 * @param u16 The top 16-bit of the pPage->HCPhys.
1413 * @param pPage Pointer to the guest page. this will be modified.
1414 * @param iPTDst The index into the shadow table.
1415 */
1416DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1417{
1418 PVM pVM = pVCpu->CTX_SUFF(pVM);
1419# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1420 /*
1421 * Just deal with the simple first time here.
1422 */
1423 if (!u16)
1424 {
1425 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1426 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1427 }
1428 else
1429 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1430
1431 /* write back */
1432 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1433 PGM_PAGE_SET_TRACKING(pPage, u16);
1434
1435# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1436
1437 /* update statistics. */
1438 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1439 pShwPage->cPresent++;
1440 if (pShwPage->iFirstPresent > iPTDst)
1441 pShwPage->iFirstPresent = iPTDst;
1442}
1443#endif /* PGMPOOL_WITH_USER_TRACKING */
1444
1445
1446/**
1447 * Creates a 4K shadow page for a guest page.
1448 *
1449 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1450 * physical address. The PdeSrc argument only the flags are used. No page structured
1451 * will be mapped in this function.
1452 *
1453 * @param pVCpu The VMCPU handle.
1454 * @param pPteDst Destination page table entry.
1455 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1456 * Can safely assume that only the flags are being used.
1457 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1458 * @param pShwPage Pointer to the shadow page.
1459 * @param iPTDst The index into the shadow table.
1460 *
1461 * @remark Not used for 2/4MB pages!
1462 */
1463DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1464{
1465 if (PteSrc.n.u1Present)
1466 {
1467 PVM pVM = pVCpu->CTX_SUFF(pVM);
1468
1469 /*
1470 * Find the ram range.
1471 */
1472 PPGMPAGE pPage;
1473 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1474 if (RT_SUCCESS(rc))
1475 {
1476#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1477 /* Try make the page writable if necessary. */
1478 if ( PteSrc.n.u1Write
1479 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1480 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1481 {
1482 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1483 AssertRC(rc);
1484 }
1485#endif
1486
1487 /** @todo investiage PWT, PCD and PAT. */
1488 /*
1489 * Make page table entry.
1490 */
1491 SHWPTE PteDst;
1492 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1493 {
1494 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1495 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1496 {
1497#if PGM_SHW_TYPE == PGM_TYPE_EPT
1498 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1499 PteDst.n.u1Present = 1;
1500 PteDst.n.u1Execute = 1;
1501 PteDst.n.u1IgnorePAT = 1;
1502 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1503 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1504#else
1505 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1506 | PGM_PAGE_GET_HCPHYS(pPage);
1507#endif
1508 }
1509 else
1510 {
1511 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1512 PteDst.u = 0;
1513 }
1514 /** @todo count these two kinds. */
1515 }
1516 else
1517 {
1518#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1519 /*
1520 * If the page or page directory entry is not marked accessed,
1521 * we mark the page not present.
1522 */
1523 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1524 {
1525 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1526 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1527 PteDst.u = 0;
1528 }
1529 else
1530 /*
1531 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1532 * when the page is modified.
1533 */
1534 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1535 {
1536 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1537 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1538 | PGM_PAGE_GET_HCPHYS(pPage)
1539 | PGM_PTFLAGS_TRACK_DIRTY;
1540 }
1541 else
1542#endif
1543 {
1544 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1545#if PGM_SHW_TYPE == PGM_TYPE_EPT
1546 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1547 PteDst.n.u1Present = 1;
1548 PteDst.n.u1Write = 1;
1549 PteDst.n.u1Execute = 1;
1550 PteDst.n.u1IgnorePAT = 1;
1551 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1552 /* PteDst.n.u1Size = 0 */
1553#else
1554 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1555 | PGM_PAGE_GET_HCPHYS(pPage);
1556#endif
1557 }
1558 }
1559
1560 /*
1561 * Make sure only allocated pages are mapped writable.
1562 */
1563 if ( PteDst.n.u1Write
1564 && PteDst.n.u1Present
1565 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1566 {
1567 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1568 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1569 }
1570
1571#ifdef PGMPOOL_WITH_USER_TRACKING
1572 /*
1573 * Keep user track up to date.
1574 */
1575 if (PteDst.n.u1Present)
1576 {
1577 if (!pPteDst->n.u1Present)
1578 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1579 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1580 {
1581 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1582 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1583 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1584 }
1585 }
1586 else if (pPteDst->n.u1Present)
1587 {
1588 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1589 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1590 }
1591#endif /* PGMPOOL_WITH_USER_TRACKING */
1592
1593 /*
1594 * Update statistics and commit the entry.
1595 */
1596#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1597 if (!PteSrc.n.u1Global)
1598 pShwPage->fSeenNonGlobal = true;
1599#endif
1600 ASMAtomicWriteSize(pPteDst, PteDst.u);
1601 }
1602 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1603 /** @todo count these. */
1604 }
1605 else
1606 {
1607 /*
1608 * Page not-present.
1609 */
1610 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1611#ifdef PGMPOOL_WITH_USER_TRACKING
1612 /* Keep user track up to date. */
1613 if (pPteDst->n.u1Present)
1614 {
1615 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1616 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1617 }
1618#endif /* PGMPOOL_WITH_USER_TRACKING */
1619 ASMAtomicWriteSize(pPteDst, 0);
1620 /** @todo count these. */
1621 }
1622}
1623
1624
1625/**
1626 * Syncs a guest OS page.
1627 *
1628 * There are no conflicts at this point, neither is there any need for
1629 * page table allocations.
1630 *
1631 * @returns VBox status code.
1632 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1633 * @param pVCpu The VMCPU handle.
1634 * @param PdeSrc Page directory entry of the guest.
1635 * @param GCPtrPage Guest context page address.
1636 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1637 * @param uErr Fault error (X86_TRAP_PF_*).
1638 */
1639PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1640{
1641 PVM pVM = pVCpu->CTX_SUFF(pVM);
1642 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1643 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1644
1645 Assert(PGMIsLockOwner(pVM));
1646
1647#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1648 || PGM_GST_TYPE == PGM_TYPE_PAE \
1649 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1650 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1651 && PGM_SHW_TYPE != PGM_TYPE_EPT
1652
1653# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1654 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVCpu) & MSR_K6_EFER_NXE);
1655# endif
1656
1657 /*
1658 * Assert preconditions.
1659 */
1660 Assert(PdeSrc.n.u1Present);
1661 Assert(cPages);
1662 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1663
1664 /*
1665 * Get the shadow PDE, find the shadow page table in the pool.
1666 */
1667# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1668 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1669 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1670
1671 /* Fetch the pgm pool shadow descriptor. */
1672 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1673 Assert(pShwPde);
1674
1675# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1676 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1677 PPGMPOOLPAGE pShwPde = NULL;
1678 PX86PDPAE pPDDst;
1679
1680 /* Fetch the pgm pool shadow descriptor. */
1681 int rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1682 AssertRCSuccessReturn(rc, rc);
1683 Assert(pShwPde);
1684
1685 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1686 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1687
1688# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1689 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1690 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1691 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1692 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1693
1694 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1695 AssertRCSuccessReturn(rc, rc);
1696 Assert(pPDDst && pPdptDst);
1697 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1698# endif
1699 SHWPDE PdeDst = *pPdeDst;
1700 if (!PdeDst.n.u1Present)
1701 {
1702 AssertMsg(pVM->cCPUs > 1, ("%Unexpected missing PDE p=%llx\n", pPdeDst, (uint64_t)PdeDst.u));
1703 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", GCPtrPage));
1704 return VINF_SUCCESS; /* force the instruction to be executed again. */
1705 }
1706
1707 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1708
1709# if PGM_GST_TYPE == PGM_TYPE_AMD64
1710 /* Fetch the pgm pool shadow descriptor. */
1711 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1712 Assert(pShwPde);
1713# endif
1714
1715# if defined(IN_RC)
1716 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1717 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1718# endif
1719
1720 /*
1721 * Check that the page is present and that the shadow PDE isn't out of sync.
1722 */
1723# if PGM_GST_TYPE == PGM_TYPE_AMD64
1724 const bool fBigPage = PdeSrc.b.u1Size;
1725# else
1726 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
1727# endif
1728 RTGCPHYS GCPhys;
1729 if (!fBigPage)
1730 {
1731 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1732# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1733 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1734 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1735# endif
1736 }
1737 else
1738 {
1739 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1740# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1741 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1742 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1743# endif
1744 }
1745 if ( pShwPage->GCPhys == GCPhys
1746 && PdeSrc.n.u1Present
1747 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1748 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1749# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1750 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1751# endif
1752 )
1753 {
1754 /*
1755 * Check that the PDE is marked accessed already.
1756 * Since we set the accessed bit *before* getting here on a #PF, this
1757 * check is only meant for dealing with non-#PF'ing paths.
1758 */
1759 if (PdeSrc.n.u1Accessed)
1760 {
1761 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1762 if (!fBigPage)
1763 {
1764 /*
1765 * 4KB Page - Map the guest page table.
1766 */
1767 PGSTPT pPTSrc;
1768 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1769 if (RT_SUCCESS(rc))
1770 {
1771# ifdef PGM_SYNC_N_PAGES
1772 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1773 if ( cPages > 1
1774 && !(uErr & X86_TRAP_PF_P)
1775 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1776 {
1777 /*
1778 * This code path is currently only taken when the caller is PGMTrap0eHandler
1779 * for non-present pages!
1780 *
1781 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1782 * deal with locality.
1783 */
1784 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1785# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1786 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1787 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1788# else
1789 const unsigned offPTSrc = 0;
1790# endif
1791 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1792 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1793 iPTDst = 0;
1794 else
1795 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1796 for (; iPTDst < iPTDstEnd; iPTDst++)
1797 {
1798 if (!pPTDst->a[iPTDst].n.u1Present)
1799 {
1800 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1801 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1802 NOREF(GCPtrCurPage);
1803#ifndef IN_RING0
1804 /*
1805 * Assuming kernel code will be marked as supervisor - and not as user level
1806 * and executed using a conforming code selector - And marked as readonly.
1807 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1808 */
1809 PPGMPAGE pPage;
1810 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1811 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1812 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)GCPtrCurPage)
1813 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1814 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1815 )
1816#endif /* else: CSAM not active */
1817 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1818 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1819 GCPtrCurPage, PteSrc.n.u1Present,
1820 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1821 PteSrc.n.u1User & PdeSrc.n.u1User,
1822 (uint64_t)PteSrc.u,
1823 (uint64_t)pPTDst->a[iPTDst].u,
1824 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1825 }
1826 }
1827 }
1828 else
1829# endif /* PGM_SYNC_N_PAGES */
1830 {
1831 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1832 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1833 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1834 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1835 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1836 GCPtrPage, PteSrc.n.u1Present,
1837 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1838 PteSrc.n.u1User & PdeSrc.n.u1User,
1839 (uint64_t)PteSrc.u,
1840 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1841 }
1842 }
1843 else /* MMIO or invalid page: emulated in #PF handler. */
1844 {
1845 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1846 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1847 }
1848 }
1849 else
1850 {
1851 /*
1852 * 4/2MB page - lazy syncing shadow 4K pages.
1853 * (There are many causes of getting here, it's no longer only CSAM.)
1854 */
1855 /* Calculate the GC physical address of this 4KB shadow page. */
1856 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1857 /* Find ram range. */
1858 PPGMPAGE pPage;
1859 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1860 if (RT_SUCCESS(rc))
1861 {
1862# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1863 /* Try make the page writable if necessary. */
1864 if ( PdeSrc.n.u1Write
1865 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1866 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1867 {
1868 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
1869 AssertRC(rc);
1870 }
1871# endif
1872
1873 /*
1874 * Make shadow PTE entry.
1875 */
1876 SHWPTE PteDst;
1877 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1878 | PGM_PAGE_GET_HCPHYS(pPage);
1879 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1880 {
1881 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1882 PteDst.n.u1Write = 0;
1883 else
1884 PteDst.u = 0;
1885 }
1886 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1887# ifdef PGMPOOL_WITH_USER_TRACKING
1888 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1889 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1890# endif
1891 /* Make sure only allocated pages are mapped writable. */
1892 if ( PteDst.n.u1Write
1893 && PteDst.n.u1Present
1894 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1895 {
1896 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1897 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1898 }
1899
1900 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1901
1902 /*
1903 * If the page is not flagged as dirty and is writable, then make it read-only
1904 * at PD level, so we can set the dirty bit when the page is modified.
1905 *
1906 * ASSUMES that page access handlers are implemented on page table entry level.
1907 * Thus we will first catch the dirty access and set PDE.D and restart. If
1908 * there is an access handler, we'll trap again and let it work on the problem.
1909 */
1910 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1911 * As for invlpg, it simply frees the whole shadow PT.
1912 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1913 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1914 {
1915 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1916 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1917 PdeDst.n.u1Write = 0;
1918 }
1919 else
1920 {
1921 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1922 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1923 }
1924 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1925 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1926 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1927 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1928 }
1929 else
1930 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1931 }
1932# if defined(IN_RC)
1933 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1934 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1935# endif
1936 return VINF_SUCCESS;
1937 }
1938 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1939 }
1940 else
1941 {
1942 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1943 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1944 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1945 }
1946
1947 /*
1948 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1949 * Yea, I'm lazy.
1950 */
1951 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1952 ASMAtomicWriteSize(pPdeDst, 0);
1953
1954# if defined(IN_RC)
1955 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1956 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1957# endif
1958 PGM_INVL_VCPU_TLBS(pVCpu);
1959 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1960
1961#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1962 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1963 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1964 && !defined(IN_RC)
1965
1966# ifdef PGM_SYNC_N_PAGES
1967 /*
1968 * Get the shadow PDE, find the shadow page table in the pool.
1969 */
1970# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1971 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
1972
1973# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1974 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
1975
1976# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1977 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1978 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1979 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1980 X86PDEPAE PdeDst;
1981 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1982
1983 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1984 AssertRCSuccessReturn(rc, rc);
1985 Assert(pPDDst && pPdptDst);
1986 PdeDst = pPDDst->a[iPDDst];
1987# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1988 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1989 PEPTPD pPDDst;
1990 EPTPDE PdeDst;
1991
1992 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
1993 if (rc != VINF_SUCCESS)
1994 {
1995 AssertRC(rc);
1996 return rc;
1997 }
1998 Assert(pPDDst);
1999 PdeDst = pPDDst->a[iPDDst];
2000# endif
2001 AssertMsg(PdeDst.n.u1Present, ("%#llx\n", (uint64_t)PdeDst.u));
2002 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2003 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2004
2005 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2006 if ( cPages > 1
2007 && !(uErr & X86_TRAP_PF_P)
2008 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2009 {
2010 /*
2011 * This code path is currently only taken when the caller is PGMTrap0eHandler
2012 * for non-present pages!
2013 *
2014 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2015 * deal with locality.
2016 */
2017 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2018 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2019 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2020 iPTDst = 0;
2021 else
2022 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2023 for (; iPTDst < iPTDstEnd; iPTDst++)
2024 {
2025 if (!pPTDst->a[iPTDst].n.u1Present)
2026 {
2027 GSTPTE PteSrc;
2028
2029 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2030
2031 /* Fake the page table entry */
2032 PteSrc.u = GCPtrCurPage;
2033 PteSrc.n.u1Present = 1;
2034 PteSrc.n.u1Dirty = 1;
2035 PteSrc.n.u1Accessed = 1;
2036 PteSrc.n.u1Write = 1;
2037 PteSrc.n.u1User = 1;
2038
2039 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2040
2041 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2042 GCPtrCurPage, PteSrc.n.u1Present,
2043 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2044 PteSrc.n.u1User & PdeSrc.n.u1User,
2045 (uint64_t)PteSrc.u,
2046 (uint64_t)pPTDst->a[iPTDst].u,
2047 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2048
2049 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2050 break;
2051 }
2052 else
2053 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2054 }
2055 }
2056 else
2057# endif /* PGM_SYNC_N_PAGES */
2058 {
2059 GSTPTE PteSrc;
2060 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2061 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2062
2063 /* Fake the page table entry */
2064 PteSrc.u = GCPtrCurPage;
2065 PteSrc.n.u1Present = 1;
2066 PteSrc.n.u1Dirty = 1;
2067 PteSrc.n.u1Accessed = 1;
2068 PteSrc.n.u1Write = 1;
2069 PteSrc.n.u1User = 1;
2070 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2071
2072 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2073 GCPtrPage, PteSrc.n.u1Present,
2074 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2075 PteSrc.n.u1User & PdeSrc.n.u1User,
2076 (uint64_t)PteSrc.u,
2077 (uint64_t)pPTDst->a[iPTDst].u,
2078 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2079 }
2080 return VINF_SUCCESS;
2081
2082#else
2083 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2084 return VERR_INTERNAL_ERROR;
2085#endif
2086}
2087
2088
2089#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2090/**
2091 * Investigate page fault and handle write protection page faults caused by
2092 * dirty bit tracking.
2093 *
2094 * @returns VBox status code.
2095 * @param pVCpu The VMCPU handle.
2096 * @param uErr Page fault error code.
2097 * @param pPdeDst Shadow page directory entry.
2098 * @param pPdeSrc Guest page directory entry.
2099 * @param GCPtrPage Guest context page address.
2100 */
2101PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2102{
2103 bool fWriteProtect = !!(CPUMGetGuestCR0(pVCpu) & X86_CR0_WP);
2104 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2105 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2106# if PGM_GST_TYPE == PGM_TYPE_AMD64
2107 bool fBigPagesSupported = true;
2108# else
2109 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
2110# endif
2111# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2112 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVCpu) & MSR_K6_EFER_NXE);
2113# endif
2114 unsigned uPageFaultLevel;
2115 int rc;
2116 PVM pVM = pVCpu->CTX_SUFF(pVM);
2117 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2118
2119 Assert(PGMIsLockOwner(pVM));
2120
2121 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2122 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2123
2124# if PGM_GST_TYPE == PGM_TYPE_PAE \
2125 || PGM_GST_TYPE == PGM_TYPE_AMD64
2126
2127# if PGM_GST_TYPE == PGM_TYPE_AMD64
2128 PX86PML4E pPml4eSrc;
2129 PX86PDPE pPdpeSrc;
2130
2131 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc);
2132 Assert(pPml4eSrc);
2133
2134 /*
2135 * Real page fault? (PML4E level)
2136 */
2137 if ( (uErr & X86_TRAP_PF_RSVD)
2138 || !pPml4eSrc->n.u1Present
2139 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
2140 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2141 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2142 )
2143 {
2144 uPageFaultLevel = 0;
2145 goto l_UpperLevelPageFault;
2146 }
2147 Assert(pPdpeSrc);
2148
2149# else /* PAE */
2150 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVCpu->pgm.s, GCPtrPage);
2151# endif /* PAE */
2152
2153 /*
2154 * Real page fault? (PDPE level)
2155 */
2156 if ( (uErr & X86_TRAP_PF_RSVD)
2157 || !pPdpeSrc->n.u1Present
2158# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2159 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
2160 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
2161 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2162# endif
2163 )
2164 {
2165 uPageFaultLevel = 1;
2166 goto l_UpperLevelPageFault;
2167 }
2168# endif
2169
2170 /*
2171 * Real page fault? (PDE level)
2172 */
2173 if ( (uErr & X86_TRAP_PF_RSVD)
2174 || !pPdeSrc->n.u1Present
2175# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2176 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
2177# endif
2178 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2179 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2180 {
2181 uPageFaultLevel = 2;
2182 goto l_UpperLevelPageFault;
2183 }
2184
2185 /*
2186 * First check the easy case where the page directory has been marked read-only to track
2187 * the dirty bit of an emulated BIG page
2188 */
2189 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2190 {
2191 /* Mark guest page directory as accessed */
2192# if PGM_GST_TYPE == PGM_TYPE_AMD64
2193 pPml4eSrc->n.u1Accessed = 1;
2194 pPdpeSrc->lm.u1Accessed = 1;
2195# endif
2196 pPdeSrc->b.u1Accessed = 1;
2197
2198 /*
2199 * Only write protection page faults are relevant here.
2200 */
2201 if (fWriteFault)
2202 {
2203 /* Mark guest page directory as dirty (BIG page only). */
2204 pPdeSrc->b.u1Dirty = 1;
2205
2206 if (pPdeDst->n.u1Present)
2207 {
2208 if (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY)
2209 {
2210 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2211 Assert(pPdeSrc->b.u1Write);
2212
2213 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2214 * fault again and take this path to only invalidate the entry.
2215 */
2216 pPdeDst->n.u1Write = 1;
2217 pPdeDst->n.u1Accessed = 1;
2218 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2219 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2220 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2221 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2222 }
2223# ifdef IN_RING0
2224 else
2225 /* Check for stale TLB entry; only applies to the SMP guest case. */
2226 if ( pVM->cCPUs > 1
2227 && pPdeDst->n.u1Write
2228 && pPdeDst->n.u1Accessed)
2229 {
2230 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2231 if (pShwPage)
2232 {
2233 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2234 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2235 if ( pPteDst->n.u1Present
2236 && pPteDst->n.u1Write)
2237 {
2238 /* Stale TLB entry. */
2239 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2240 PGM_INVL_PG(pVCpu, GCPtrPage);
2241
2242 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2243 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2244 }
2245 }
2246 }
2247# endif /* IN_RING0 */
2248 }
2249 }
2250 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2251 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2252 }
2253 /* else: 4KB page table */
2254
2255 /*
2256 * Map the guest page table.
2257 */
2258 PGSTPT pPTSrc;
2259 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2260 if (RT_SUCCESS(rc))
2261 {
2262 /*
2263 * Real page fault?
2264 */
2265 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2266 const GSTPTE PteSrc = *pPteSrc;
2267 if ( !PteSrc.n.u1Present
2268# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2269 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
2270# endif
2271 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
2272 || (fUserLevelFault && !PteSrc.n.u1User)
2273 )
2274 {
2275 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2276 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2277 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2278
2279 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2280 * See the 2nd case above as well.
2281 */
2282 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2283 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2284
2285 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2286 return VINF_EM_RAW_GUEST_TRAP;
2287 }
2288 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2289
2290 /*
2291 * Set the accessed bits in the page directory and the page table.
2292 */
2293# if PGM_GST_TYPE == PGM_TYPE_AMD64
2294 pPml4eSrc->n.u1Accessed = 1;
2295 pPdpeSrc->lm.u1Accessed = 1;
2296# endif
2297 pPdeSrc->n.u1Accessed = 1;
2298 pPteSrc->n.u1Accessed = 1;
2299
2300 /*
2301 * Only write protection page faults are relevant here.
2302 */
2303 if (fWriteFault)
2304 {
2305 /* Write access, so mark guest entry as dirty. */
2306# ifdef VBOX_WITH_STATISTICS
2307 if (!pPteSrc->n.u1Dirty)
2308 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2309 else
2310 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2311# endif
2312
2313 pPteSrc->n.u1Dirty = 1;
2314
2315 if (pPdeDst->n.u1Present)
2316 {
2317#ifndef IN_RING0
2318 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2319 * Our individual shadow handlers will provide more information and force a fatal exit.
2320 */
2321 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2322 {
2323 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2324 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2325 return VINF_SUCCESS;
2326 }
2327#endif
2328 /*
2329 * Map shadow page table.
2330 */
2331 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2332 if (pShwPage)
2333 {
2334 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2335 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2336 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2337 {
2338 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2339 {
2340 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2341# ifdef VBOX_STRICT
2342 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2343 if (pPage)
2344 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
2345 ("Unexpected dirty bit tracking on monitored page %RGv (phys %RGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
2346# endif
2347 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2348
2349 Assert(pPteSrc->n.u1Write);
2350
2351 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2352 * fault again and take this path to only invalidate the entry.
2353 */
2354 pPteDst->n.u1Write = 1;
2355 pPteDst->n.u1Dirty = 1;
2356 pPteDst->n.u1Accessed = 1;
2357 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2358 PGM_INVL_PG(pVCpu, GCPtrPage);
2359
2360 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2361 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2362 }
2363# ifdef IN_RING0
2364 else
2365 /* Check for stale TLB entry; only applies to the SMP guest case. */
2366 if ( pVM->cCPUs > 1
2367 && pPteDst->n.u1Write == 1
2368 && pPteDst->n.u1Accessed == 1)
2369 {
2370 /* Stale TLB entry. */
2371 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2372 PGM_INVL_PG(pVCpu, GCPtrPage);
2373
2374 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2375 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2376 }
2377# endif
2378 }
2379 }
2380 else
2381 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2382 }
2383 }
2384/** @todo Optimize accessed bit emulation? */
2385# ifdef VBOX_STRICT
2386 /*
2387 * Sanity check.
2388 */
2389 else if ( !pPteSrc->n.u1Dirty
2390 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2391 && pPdeDst->n.u1Present)
2392 {
2393 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2394 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2395 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2396 if ( pPteDst->n.u1Present
2397 && pPteDst->n.u1Write)
2398 LogFlow(("Writable present page %RGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2399 }
2400# endif /* VBOX_STRICT */
2401 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2402 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2403 }
2404 AssertRC(rc);
2405 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2406 return rc;
2407
2408
2409l_UpperLevelPageFault:
2410 /*
2411 * Pagefault detected while checking the PML4E, PDPE or PDE.
2412 * Single exit handler to get rid of duplicate code paths.
2413 */
2414 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2415 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2416 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2417
2418 if (
2419# if PGM_GST_TYPE == PGM_TYPE_AMD64
2420 pPml4eSrc->n.u1Present &&
2421# endif
2422# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2423 pPdpeSrc->n.u1Present &&
2424# endif
2425 pPdeSrc->n.u1Present)
2426 {
2427 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2428 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2429 {
2430 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2431 }
2432 else
2433 {
2434 /*
2435 * Map the guest page table.
2436 */
2437 PGSTPT pPTSrc;
2438 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2439 if (RT_SUCCESS(rc))
2440 {
2441 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2442 const GSTPTE PteSrc = *pPteSrc;
2443 if (pPteSrc->n.u1Present)
2444 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2445 }
2446 AssertRC(rc);
2447 }
2448 }
2449 return VINF_EM_RAW_GUEST_TRAP;
2450}
2451#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2452
2453
2454/**
2455 * Sync a shadow page table.
2456 *
2457 * The shadow page table is not present. This includes the case where
2458 * there is a conflict with a mapping.
2459 *
2460 * @returns VBox status code.
2461 * @param pVCpu The VMCPU handle.
2462 * @param iPD Page directory index.
2463 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2464 * Assume this is a temporary mapping.
2465 * @param GCPtrPage GC Pointer of the page that caused the fault
2466 */
2467PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2468{
2469 PVM pVM = pVCpu->CTX_SUFF(pVM);
2470 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2471
2472 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2473 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2474 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2475
2476 Assert(PGMIsLocked(pVM));
2477
2478#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2479 || PGM_GST_TYPE == PGM_TYPE_PAE \
2480 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2481 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2482 && PGM_SHW_TYPE != PGM_TYPE_EPT
2483
2484 int rc = VINF_SUCCESS;
2485
2486 /*
2487 * Validate input a little bit.
2488 */
2489 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2490# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2491 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2492 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2493
2494 /* Fetch the pgm pool shadow descriptor. */
2495 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2496 Assert(pShwPde);
2497
2498# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2499 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2500 PPGMPOOLPAGE pShwPde = NULL;
2501 PX86PDPAE pPDDst;
2502 PSHWPDE pPdeDst;
2503
2504 /* Fetch the pgm pool shadow descriptor. */
2505 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2506 AssertRCSuccessReturn(rc, rc);
2507 Assert(pShwPde);
2508
2509 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2510 pPdeDst = &pPDDst->a[iPDDst];
2511
2512# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2513 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2514 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2515 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2516 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2517 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2518 AssertRCSuccessReturn(rc, rc);
2519 Assert(pPDDst);
2520 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2521# endif
2522 SHWPDE PdeDst = *pPdeDst;
2523
2524# if PGM_GST_TYPE == PGM_TYPE_AMD64
2525 /* Fetch the pgm pool shadow descriptor. */
2526 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2527 Assert(pShwPde);
2528# endif
2529
2530# ifndef PGM_WITHOUT_MAPPINGS
2531 /*
2532 * Check for conflicts.
2533 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2534 * HC: Simply resolve the conflict.
2535 */
2536 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2537 {
2538 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2539# ifndef IN_RING3
2540 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2541 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2542 return VERR_ADDRESS_CONFLICT;
2543# else
2544 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2545 Assert(pMapping);
2546# if PGM_GST_TYPE == PGM_TYPE_32BIT
2547 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2548# elif PGM_GST_TYPE == PGM_TYPE_PAE
2549 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2550# else
2551 AssertFailed(); /* can't happen for amd64 */
2552# endif
2553 if (RT_FAILURE(rc))
2554 {
2555 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2556 return rc;
2557 }
2558 PdeDst = *pPdeDst;
2559# endif
2560 }
2561# else /* PGM_WITHOUT_MAPPINGS */
2562 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2563# endif /* PGM_WITHOUT_MAPPINGS */
2564 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2565
2566# if defined(IN_RC)
2567 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2568 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2569# endif
2570
2571 /*
2572 * Sync page directory entry.
2573 */
2574 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2575 if (PdeSrc.n.u1Present)
2576 {
2577 /*
2578 * Allocate & map the page table.
2579 */
2580 PSHWPT pPTDst;
2581# if PGM_GST_TYPE == PGM_TYPE_AMD64
2582 const bool fPageTable = !PdeSrc.b.u1Size;
2583# else
2584 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
2585# endif
2586 PPGMPOOLPAGE pShwPage;
2587 RTGCPHYS GCPhys;
2588 if (fPageTable)
2589 {
2590 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2591# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2592 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2593 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2594# endif
2595 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2596 }
2597 else
2598 {
2599 PGMPOOLACCESS enmAccess;
2600
2601 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2602# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2603 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2604 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2605# endif
2606 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2607 if (PdeSrc.n.u1User)
2608 {
2609 if (PdeSrc.n.u1Write)
2610 enmAccess = PGMPOOLACCESS_USER_RW;
2611 else
2612 enmAccess = PGMPOOLACCESS_USER_R;
2613 }
2614 else
2615 {
2616 if (PdeSrc.n.u1Write)
2617 enmAccess = PGMPOOLACCESS_SUPERVISOR_RW;
2618 else
2619 enmAccess = PGMPOOLACCESS_SUPERVISOR_R;
2620 }
2621 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2622 }
2623 if (rc == VINF_SUCCESS)
2624 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2625 else if (rc == VINF_PGM_CACHED_PAGE)
2626 {
2627 /*
2628 * The PT was cached, just hook it up.
2629 */
2630 if (fPageTable)
2631 PdeDst.u = pShwPage->Core.Key
2632 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2633 else
2634 {
2635 PdeDst.u = pShwPage->Core.Key
2636 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2637 /* (see explanation and assumptions further down.) */
2638 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2639 {
2640 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2641 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2642 PdeDst.b.u1Write = 0;
2643 }
2644 }
2645 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2646# if defined(IN_RC)
2647 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2648# endif
2649 return VINF_SUCCESS;
2650 }
2651 else if (rc == VERR_PGM_POOL_FLUSHED)
2652 {
2653 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2654# if defined(IN_RC)
2655 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2656# endif
2657 return VINF_PGM_SYNC_CR3;
2658 }
2659 else
2660 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2661 PdeDst.u &= X86_PDE_AVL_MASK;
2662 PdeDst.u |= pShwPage->Core.Key;
2663
2664 /*
2665 * Page directory has been accessed (this is a fault situation, remember).
2666 */
2667 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2668 if (fPageTable)
2669 {
2670 /*
2671 * Page table - 4KB.
2672 *
2673 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2674 */
2675 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2676 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2677 PGSTPT pPTSrc;
2678 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2679 if (RT_SUCCESS(rc))
2680 {
2681 /*
2682 * Start by syncing the page directory entry so CSAM's TLB trick works.
2683 */
2684 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2685 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2686 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2687# if defined(IN_RC)
2688 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2689# endif
2690
2691 /*
2692 * Directory/page user or supervisor privilege: (same goes for read/write)
2693 *
2694 * Directory Page Combined
2695 * U/S U/S U/S
2696 * 0 0 0
2697 * 0 1 0
2698 * 1 0 0
2699 * 1 1 1
2700 *
2701 * Simple AND operation. Table listed for completeness.
2702 *
2703 */
2704 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2705# ifdef PGM_SYNC_N_PAGES
2706 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2707 unsigned iPTDst = iPTBase;
2708 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2709 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2710 iPTDst = 0;
2711 else
2712 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2713# else /* !PGM_SYNC_N_PAGES */
2714 unsigned iPTDst = 0;
2715 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2716# endif /* !PGM_SYNC_N_PAGES */
2717# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2718 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2719 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2720# else
2721 const unsigned offPTSrc = 0;
2722# endif
2723 for (; iPTDst < iPTDstEnd; iPTDst++)
2724 {
2725 const unsigned iPTSrc = iPTDst + offPTSrc;
2726 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2727
2728 if (PteSrc.n.u1Present) /* we've already cleared it above */
2729 {
2730# ifndef IN_RING0
2731 /*
2732 * Assuming kernel code will be marked as supervisor - and not as user level
2733 * and executed using a conforming code selector - And marked as readonly.
2734 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2735 */
2736 PPGMPAGE pPage;
2737 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2738 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2739 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2740 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2741 )
2742# endif
2743 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2744 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2745 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2746 PteSrc.n.u1Present,
2747 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2748 PteSrc.n.u1User & PdeSrc.n.u1User,
2749 (uint64_t)PteSrc.u,
2750 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2751 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2752 }
2753 } /* for PTEs */
2754 }
2755 }
2756 else
2757 {
2758 /*
2759 * Big page - 2/4MB.
2760 *
2761 * We'll walk the ram range list in parallel and optimize lookups.
2762 * We will only sync on shadow page table at a time.
2763 */
2764 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2765
2766 /**
2767 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2768 */
2769
2770 /*
2771 * Start by syncing the page directory entry.
2772 */
2773 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2774 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2775
2776 /*
2777 * If the page is not flagged as dirty and is writable, then make it read-only
2778 * at PD level, so we can set the dirty bit when the page is modified.
2779 *
2780 * ASSUMES that page access handlers are implemented on page table entry level.
2781 * Thus we will first catch the dirty access and set PDE.D and restart. If
2782 * there is an access handler, we'll trap again and let it work on the problem.
2783 */
2784 /** @todo move the above stuff to a section in the PGM documentation. */
2785 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2786 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2787 {
2788 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2789 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2790 PdeDst.b.u1Write = 0;
2791 }
2792 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2793# if defined(IN_RC)
2794 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2795# endif
2796
2797 /*
2798 * Fill the shadow page table.
2799 */
2800 /* Get address and flags from the source PDE. */
2801 SHWPTE PteDstBase;
2802 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2803
2804 /* Loop thru the entries in the shadow PT. */
2805 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2806 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2807 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2808 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2809 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2810 unsigned iPTDst = 0;
2811 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2812 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2813 {
2814 /* Advance ram range list. */
2815 while (pRam && GCPhys > pRam->GCPhysLast)
2816 pRam = pRam->CTX_SUFF(pNext);
2817 if (pRam && GCPhys >= pRam->GCPhys)
2818 {
2819 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2820 do
2821 {
2822 /* Make shadow PTE. */
2823 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2824 SHWPTE PteDst;
2825
2826# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2827 /* Try make the page writable if necessary. */
2828 if ( PteDstBase.n.u1Write
2829 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2830 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2831 {
2832 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
2833 AssertRCReturn(rc, rc);
2834 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2835 break;
2836 }
2837# endif
2838
2839 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2840 {
2841 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2842 {
2843 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2844 PteDst.n.u1Write = 0;
2845 }
2846 else
2847 PteDst.u = 0;
2848 }
2849# ifndef IN_RING0
2850 /*
2851 * Assuming kernel code will be marked as supervisor and not as user level and executed
2852 * using a conforming code selector. Don't check for readonly, as that implies the whole
2853 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2854 */
2855 else if ( !PdeSrc.n.u1User
2856 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2857 PteDst.u = 0;
2858# endif
2859 else
2860 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2861
2862 /* Only map writable pages writable. */
2863 if ( PteDst.n.u1Write
2864 && PteDst.n.u1Present
2865 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2866 {
2867 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2868 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2869 }
2870
2871# ifdef PGMPOOL_WITH_USER_TRACKING
2872 if (PteDst.n.u1Present)
2873 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2874# endif
2875 /* commit it */
2876 pPTDst->a[iPTDst] = PteDst;
2877 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2878 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2879 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2880
2881 /* advance */
2882 GCPhys += PAGE_SIZE;
2883 iHCPage++;
2884 iPTDst++;
2885 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2886 && GCPhys <= pRam->GCPhysLast);
2887 }
2888 else if (pRam)
2889 {
2890 Log(("Invalid pages at %RGp\n", GCPhys));
2891 do
2892 {
2893 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2894 GCPhys += PAGE_SIZE;
2895 iPTDst++;
2896 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2897 && GCPhys < pRam->GCPhys);
2898 }
2899 else
2900 {
2901 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2902 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2903 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2904 }
2905 } /* while more PTEs */
2906 } /* 4KB / 4MB */
2907 }
2908 else
2909 AssertRelease(!PdeDst.n.u1Present);
2910
2911 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2912 if (RT_FAILURE(rc))
2913 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2914 return rc;
2915
2916#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2917 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2918 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2919 && !defined(IN_RC)
2920
2921 /*
2922 * Validate input a little bit.
2923 */
2924 int rc = VINF_SUCCESS;
2925# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2926 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2927 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2928
2929 /* Fetch the pgm pool shadow descriptor. */
2930 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2931 Assert(pShwPde);
2932
2933# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2934 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2935 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
2936 PX86PDPAE pPDDst;
2937 PSHWPDE pPdeDst;
2938
2939 /* Fetch the pgm pool shadow descriptor. */
2940 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2941 AssertRCSuccessReturn(rc, rc);
2942 Assert(pShwPde);
2943
2944 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2945 pPdeDst = &pPDDst->a[iPDDst];
2946
2947# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2948 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2949 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2950 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2951 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
2952 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2953 AssertRCSuccessReturn(rc, rc);
2954 Assert(pPDDst);
2955 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2956
2957 /* Fetch the pgm pool shadow descriptor. */
2958 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2959 Assert(pShwPde);
2960
2961# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2962 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2963 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2964 PEPTPD pPDDst;
2965 PEPTPDPT pPdptDst;
2966
2967 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
2968 if (rc != VINF_SUCCESS)
2969 {
2970 AssertRC(rc);
2971 return rc;
2972 }
2973 Assert(pPDDst);
2974 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2975
2976 /* Fetch the pgm pool shadow descriptor. */
2977 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2978 Assert(pShwPde);
2979# endif
2980 SHWPDE PdeDst = *pPdeDst;
2981
2982 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2983 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2984
2985 GSTPDE PdeSrc;
2986 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2987 PdeSrc.n.u1Present = 1;
2988 PdeSrc.n.u1Write = 1;
2989 PdeSrc.n.u1Accessed = 1;
2990 PdeSrc.n.u1User = 1;
2991
2992 /*
2993 * Allocate & map the page table.
2994 */
2995 PSHWPT pPTDst;
2996 PPGMPOOLPAGE pShwPage;
2997 RTGCPHYS GCPhys;
2998
2999 /* Virtual address = physical address */
3000 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3001 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3002
3003 if ( rc == VINF_SUCCESS
3004 || rc == VINF_PGM_CACHED_PAGE)
3005 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3006 else
3007 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3008
3009 PdeDst.u &= X86_PDE_AVL_MASK;
3010 PdeDst.u |= pShwPage->Core.Key;
3011 PdeDst.n.u1Present = 1;
3012 PdeDst.n.u1Write = 1;
3013# if PGM_SHW_TYPE == PGM_TYPE_EPT
3014 PdeDst.n.u1Execute = 1;
3015# else
3016 PdeDst.n.u1User = 1;
3017 PdeDst.n.u1Accessed = 1;
3018# endif
3019 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3020
3021 pgmLock(pVM);
3022 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3023 pgmUnlock(pVM);
3024 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3025 return rc;
3026
3027#else
3028 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3029 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3030 return VERR_INTERNAL_ERROR;
3031#endif
3032}
3033
3034
3035
3036/**
3037 * Prefetch a page/set of pages.
3038 *
3039 * Typically used to sync commonly used pages before entering raw mode
3040 * after a CR3 reload.
3041 *
3042 * @returns VBox status code.
3043 * @param pVCpu The VMCPU handle.
3044 * @param GCPtrPage Page to invalidate.
3045 */
3046PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3047{
3048#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3049 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3050 /*
3051 * Check that all Guest levels thru the PDE are present, getting the
3052 * PD and PDE in the processes.
3053 */
3054 int rc = VINF_SUCCESS;
3055# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3056# if PGM_GST_TYPE == PGM_TYPE_32BIT
3057 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3058 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3059# elif PGM_GST_TYPE == PGM_TYPE_PAE
3060 unsigned iPDSrc;
3061 X86PDPE PdpeSrc;
3062 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3063 if (!pPDSrc)
3064 return VINF_SUCCESS; /* not present */
3065# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3066 unsigned iPDSrc;
3067 PX86PML4E pPml4eSrc;
3068 X86PDPE PdpeSrc;
3069 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3070 if (!pPDSrc)
3071 return VINF_SUCCESS; /* not present */
3072# endif
3073 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3074# else
3075 PGSTPD pPDSrc = NULL;
3076 const unsigned iPDSrc = 0;
3077 GSTPDE PdeSrc;
3078
3079 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3080 PdeSrc.n.u1Present = 1;
3081 PdeSrc.n.u1Write = 1;
3082 PdeSrc.n.u1Accessed = 1;
3083 PdeSrc.n.u1User = 1;
3084# endif
3085
3086 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3087 {
3088 PVM pVM = pVCpu->CTX_SUFF(pVM);
3089 pgmLock(pVM);
3090
3091# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3092 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3093# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3094 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3095 PX86PDPAE pPDDst;
3096 X86PDEPAE PdeDst;
3097# if PGM_GST_TYPE != PGM_TYPE_PAE
3098 X86PDPE PdpeSrc;
3099
3100 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3101 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3102# endif
3103 int rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3104 if (rc != VINF_SUCCESS)
3105 {
3106 pgmUnlock(pVM);
3107 AssertRC(rc);
3108 return rc;
3109 }
3110 Assert(pPDDst);
3111 PdeDst = pPDDst->a[iPDDst];
3112
3113# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3114 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3115 PX86PDPAE pPDDst;
3116 X86PDEPAE PdeDst;
3117
3118# if PGM_GST_TYPE == PGM_TYPE_PROT
3119 /* AMD-V nested paging */
3120 X86PML4E Pml4eSrc;
3121 X86PDPE PdpeSrc;
3122 PX86PML4E pPml4eSrc = &Pml4eSrc;
3123
3124 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3125 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3126 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3127# endif
3128
3129 int rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3130 if (rc != VINF_SUCCESS)
3131 {
3132 pgmUnlock(pVM);
3133 AssertRC(rc);
3134 return rc;
3135 }
3136 Assert(pPDDst);
3137 PdeDst = pPDDst->a[iPDDst];
3138# endif
3139 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3140 {
3141 if (!PdeDst.n.u1Present)
3142 {
3143 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3144 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3145 }
3146 else
3147 {
3148 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3149 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3150 * makes no sense to prefetch more than one page.
3151 */
3152 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3153 if (RT_SUCCESS(rc))
3154 rc = VINF_SUCCESS;
3155 }
3156 }
3157 pgmUnlock(pVM);
3158 }
3159 return rc;
3160
3161#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3162 return VINF_SUCCESS; /* ignore */
3163#endif
3164}
3165
3166
3167
3168
3169/**
3170 * Syncs a page during a PGMVerifyAccess() call.
3171 *
3172 * @returns VBox status code (informational included).
3173 * @param pVCpu The VMCPU handle.
3174 * @param GCPtrPage The address of the page to sync.
3175 * @param fPage The effective guest page flags.
3176 * @param uErr The trap error code.
3177 */
3178PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3179{
3180 PVM pVM = pVCpu->CTX_SUFF(pVM);
3181
3182 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3183
3184 Assert(!HWACCMIsNestedPagingActive(pVM));
3185#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3186 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3187
3188# ifndef IN_RING0
3189 if (!(fPage & X86_PTE_US))
3190 {
3191 /*
3192 * Mark this page as safe.
3193 */
3194 /** @todo not correct for pages that contain both code and data!! */
3195 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3196 CSAMMarkPage(pVM, (RTRCPTR)GCPtrPage, true);
3197 }
3198# endif
3199
3200 /*
3201 * Get guest PD and index.
3202 */
3203# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3204# if PGM_GST_TYPE == PGM_TYPE_32BIT
3205 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3206 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3207# elif PGM_GST_TYPE == PGM_TYPE_PAE
3208 unsigned iPDSrc = 0;
3209 X86PDPE PdpeSrc;
3210 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3211
3212 if (pPDSrc)
3213 {
3214 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3215 return VINF_EM_RAW_GUEST_TRAP;
3216 }
3217# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3218 unsigned iPDSrc;
3219 PX86PML4E pPml4eSrc;
3220 X86PDPE PdpeSrc;
3221 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3222 if (!pPDSrc)
3223 {
3224 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3225 return VINF_EM_RAW_GUEST_TRAP;
3226 }
3227# endif
3228# else
3229 PGSTPD pPDSrc = NULL;
3230 const unsigned iPDSrc = 0;
3231# endif
3232 int rc = VINF_SUCCESS;
3233
3234 pgmLock(pVM);
3235
3236 /*
3237 * First check if the shadow pd is present.
3238 */
3239# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3240 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3241# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3242 PX86PDEPAE pPdeDst;
3243 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3244 PX86PDPAE pPDDst;
3245# if PGM_GST_TYPE != PGM_TYPE_PAE
3246 X86PDPE PdpeSrc;
3247
3248 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3249 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3250# endif
3251 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3252 if (rc != VINF_SUCCESS)
3253 {
3254 pgmUnlock(pVM);
3255 AssertRC(rc);
3256 return rc;
3257 }
3258 Assert(pPDDst);
3259 pPdeDst = &pPDDst->a[iPDDst];
3260
3261# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3262 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3263 PX86PDPAE pPDDst;
3264 PX86PDEPAE pPdeDst;
3265
3266# if PGM_GST_TYPE == PGM_TYPE_PROT
3267 /* AMD-V nested paging */
3268 X86PML4E Pml4eSrc;
3269 X86PDPE PdpeSrc;
3270 PX86PML4E pPml4eSrc = &Pml4eSrc;
3271
3272 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3273 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3274 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3275# endif
3276
3277 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3278 if (rc != VINF_SUCCESS)
3279 {
3280 pgmUnlock(pVM);
3281 AssertRC(rc);
3282 return rc;
3283 }
3284 Assert(pPDDst);
3285 pPdeDst = &pPDDst->a[iPDDst];
3286# endif
3287
3288# if defined(IN_RC)
3289 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3290 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3291# endif
3292
3293 if (!pPdeDst->n.u1Present)
3294 {
3295 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3296 if (rc != VINF_SUCCESS)
3297 {
3298# if defined(IN_RC)
3299 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3300 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3301# endif
3302 pgmUnlock(pVM);
3303 AssertRC(rc);
3304 return rc;
3305 }
3306 }
3307
3308# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3309 /* Check for dirty bit fault */
3310 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3311 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3312 Log(("PGMVerifyAccess: success (dirty)\n"));
3313 else
3314 {
3315 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3316# else
3317 {
3318 GSTPDE PdeSrc;
3319 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3320 PdeSrc.n.u1Present = 1;
3321 PdeSrc.n.u1Write = 1;
3322 PdeSrc.n.u1Accessed = 1;
3323 PdeSrc.n.u1User = 1;
3324
3325# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3326 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3327 if (uErr & X86_TRAP_PF_US)
3328 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3329 else /* supervisor */
3330 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3331
3332 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3333 if (RT_SUCCESS(rc))
3334 {
3335 /* Page was successfully synced */
3336 Log2(("PGMVerifyAccess: success (sync)\n"));
3337 rc = VINF_SUCCESS;
3338 }
3339 else
3340 {
3341 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3342 rc = VINF_EM_RAW_GUEST_TRAP;
3343 }
3344 }
3345# if defined(IN_RC)
3346 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3347 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3348# endif
3349 pgmUnlock(pVM);
3350 return rc;
3351
3352#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3353
3354 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3355 return VERR_INTERNAL_ERROR;
3356#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3357}
3358
3359#undef MY_STAM_COUNTER_INC
3360#define MY_STAM_COUNTER_INC(a) do { } while (0)
3361
3362
3363/**
3364 * Syncs the paging hierarchy starting at CR3.
3365 *
3366 * @returns VBox status code, no specials.
3367 * @param pVCpu The VMCPU handle.
3368 * @param cr0 Guest context CR0 register
3369 * @param cr3 Guest context CR3 register
3370 * @param cr4 Guest context CR4 register
3371 * @param fGlobal Including global page directories or not
3372 */
3373PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3374{
3375 PVM pVM = pVCpu->CTX_SUFF(pVM);
3376
3377 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
3378 fGlobal = true; /* Change this CR3 reload to be a global one. */
3379
3380 LogFlow(("SyncCR3 %d\n", fGlobal));
3381
3382#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3383 /*
3384 * Update page access handlers.
3385 * The virtual are always flushed, while the physical are only on demand.
3386 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3387 * have to look into that later because it will have a bad influence on the performance.
3388 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3389 * bird: Yes, but that won't work for aliases.
3390 */
3391 /** @todo this MUST go away. See #1557. */
3392 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3393 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3394 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3395#endif
3396
3397#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3398 /*
3399 * Nested / EPT - almost no work.
3400 */
3401 /** @todo check if this is really necessary; the call does it as well... */
3402 HWACCMFlushTLB(pVCpu);
3403 return VINF_SUCCESS;
3404
3405#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3406 /*
3407 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3408 * out the shadow parts when the guest modifies its tables.
3409 */
3410 return VINF_SUCCESS;
3411
3412#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3413
3414# ifdef PGM_WITHOUT_MAPPINGS
3415 Assert(pVM->pgm.s.fMappingsFixed);
3416 return VINF_SUCCESS;
3417# else
3418 /* Nothing to do when mappings are fixed. */
3419 if (pVM->pgm.s.fMappingsFixed)
3420 return VINF_SUCCESS;
3421
3422 int rc = PGMMapResolveConflicts(pVM);
3423 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3424 if (rc == VINF_PGM_SYNC_CR3)
3425 {
3426 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3427 return VINF_PGM_SYNC_CR3;
3428 }
3429# endif
3430 return VINF_SUCCESS;
3431#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3432}
3433
3434
3435
3436
3437#ifdef VBOX_STRICT
3438#ifdef IN_RC
3439# undef AssertMsgFailed
3440# define AssertMsgFailed Log
3441#endif
3442#ifdef IN_RING3
3443# include <VBox/dbgf.h>
3444
3445/**
3446 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3447 *
3448 * @returns VBox status code (VINF_SUCCESS).
3449 * @param cr3 The root of the hierarchy.
3450 * @param crr The cr4, only PAE and PSE is currently used.
3451 * @param fLongMode Set if long mode, false if not long mode.
3452 * @param cMaxDepth Number of levels to dump.
3453 * @param pHlp Pointer to the output functions.
3454 */
3455RT_C_DECLS_BEGIN
3456VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3457RT_C_DECLS_END
3458
3459#endif
3460
3461/**
3462 * Checks that the shadow page table is in sync with the guest one.
3463 *
3464 * @returns The number of errors.
3465 * @param pVM The virtual machine.
3466 * @param pVCpu The VMCPU handle.
3467 * @param cr3 Guest context CR3 register
3468 * @param cr4 Guest context CR4 register
3469 * @param GCPtr Where to start. Defaults to 0.
3470 * @param cb How much to check. Defaults to everything.
3471 */
3472PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3473{
3474#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3475 return 0;
3476#else
3477 unsigned cErrors = 0;
3478 PVM pVM = pVCpu->CTX_SUFF(pVM);
3479 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3480
3481#if PGM_GST_TYPE == PGM_TYPE_PAE
3482 /** @todo currently broken; crashes below somewhere */
3483 AssertFailed();
3484#endif
3485
3486#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3487 || PGM_GST_TYPE == PGM_TYPE_PAE \
3488 || PGM_GST_TYPE == PGM_TYPE_AMD64
3489
3490# if PGM_GST_TYPE == PGM_TYPE_AMD64
3491 bool fBigPagesSupported = true;
3492# else
3493 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
3494# endif
3495 PPGMCPU pPGM = &pVCpu->pgm.s;
3496 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3497 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3498# ifndef IN_RING0
3499 RTHCPHYS HCPhys; /* general usage. */
3500# endif
3501 int rc;
3502
3503 /*
3504 * Check that the Guest CR3 and all its mappings are correct.
3505 */
3506 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3507 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3508 false);
3509# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3510# if PGM_GST_TYPE == PGM_TYPE_32BIT
3511 rc = PGMShwGetPage(pVCpu, (RTGCPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3512# else
3513 rc = PGMShwGetPage(pVCpu, (RTGCPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3514# endif
3515 AssertRCReturn(rc, 1);
3516 HCPhys = NIL_RTHCPHYS;
3517 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3518 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3519# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3520 pgmGstGet32bitPDPtr(pPGM);
3521 RTGCPHYS GCPhys;
3522 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3523 AssertRCReturn(rc, 1);
3524 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3525# endif
3526# endif /* !IN_RING0 */
3527
3528 /*
3529 * Get and check the Shadow CR3.
3530 */
3531# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3532 unsigned cPDEs = X86_PG_ENTRIES;
3533 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3534# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3535# if PGM_GST_TYPE == PGM_TYPE_32BIT
3536 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3537# else
3538 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3539# endif
3540 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3541# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3542 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3543 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3544# endif
3545 if (cb != ~(RTGCPTR)0)
3546 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3547
3548/** @todo call the other two PGMAssert*() functions. */
3549
3550# if PGM_GST_TYPE == PGM_TYPE_AMD64
3551 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3552
3553 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3554 {
3555 PPGMPOOLPAGE pShwPdpt = NULL;
3556 PX86PML4E pPml4eSrc;
3557 PX86PML4E pPml4eDst;
3558 RTGCPHYS GCPhysPdptSrc;
3559
3560 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3561 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3562
3563 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3564 if (!pPml4eDst->n.u1Present)
3565 {
3566 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3567 continue;
3568 }
3569
3570 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3571 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3572
3573 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3574 {
3575 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3576 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3577 cErrors++;
3578 continue;
3579 }
3580
3581 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3582 {
3583 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3584 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3585 cErrors++;
3586 continue;
3587 }
3588
3589 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3590 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3591 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3592 {
3593 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3594 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3595 cErrors++;
3596 continue;
3597 }
3598# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3599 {
3600# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3601
3602# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3603 /*
3604 * Check the PDPTEs too.
3605 */
3606 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3607
3608 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3609 {
3610 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3611 PPGMPOOLPAGE pShwPde = NULL;
3612 PX86PDPE pPdpeDst;
3613 RTGCPHYS GCPhysPdeSrc;
3614# if PGM_GST_TYPE == PGM_TYPE_PAE
3615 X86PDPE PdpeSrc;
3616 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3617 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3618# else
3619 PX86PML4E pPml4eSrc;
3620 X86PDPE PdpeSrc;
3621 PX86PDPT pPdptDst;
3622 PX86PDPAE pPDDst;
3623 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3624
3625 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3626 if (rc != VINF_SUCCESS)
3627 {
3628 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3629 GCPtr += 512 * _2M;
3630 continue; /* next PDPTE */
3631 }
3632 Assert(pPDDst);
3633# endif
3634 Assert(iPDSrc == 0);
3635
3636 pPdpeDst = &pPdptDst->a[iPdpt];
3637
3638 if (!pPdpeDst->n.u1Present)
3639 {
3640 GCPtr += 512 * _2M;
3641 continue; /* next PDPTE */
3642 }
3643
3644 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3645 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3646
3647 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3648 {
3649 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3650 GCPtr += 512 * _2M;
3651 cErrors++;
3652 continue;
3653 }
3654
3655 if (GCPhysPdeSrc != pShwPde->GCPhys)
3656 {
3657# if PGM_GST_TYPE == PGM_TYPE_AMD64
3658 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3659# else
3660 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3661# endif
3662 GCPtr += 512 * _2M;
3663 cErrors++;
3664 continue;
3665 }
3666
3667# if PGM_GST_TYPE == PGM_TYPE_AMD64
3668 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3669 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3670 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3671 {
3672 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3673 GCPtr += 512 * _2M;
3674 cErrors++;
3675 continue;
3676 }
3677# endif
3678
3679# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3680 {
3681# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3682# if PGM_GST_TYPE == PGM_TYPE_32BIT
3683 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3684# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3685 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3686# endif
3687# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3688 /*
3689 * Iterate the shadow page directory.
3690 */
3691 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3692 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3693
3694 for (;
3695 iPDDst < cPDEs;
3696 iPDDst++, GCPtr += cIncrement)
3697 {
3698# if PGM_SHW_TYPE == PGM_TYPE_PAE
3699 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3700# else
3701 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3702# endif
3703 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3704 {
3705 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3706 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3707 {
3708 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3709 cErrors++;
3710 continue;
3711 }
3712 }
3713 else if ( (PdeDst.u & X86_PDE_P)
3714 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3715 )
3716 {
3717 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3718 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3719 if (!pPoolPage)
3720 {
3721 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3722 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3723 cErrors++;
3724 continue;
3725 }
3726 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3727
3728 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3729 {
3730 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3731 GCPtr, (uint64_t)PdeDst.u));
3732 cErrors++;
3733 }
3734
3735 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3736 {
3737 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3738 GCPtr, (uint64_t)PdeDst.u));
3739 cErrors++;
3740 }
3741
3742 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3743 if (!PdeSrc.n.u1Present)
3744 {
3745 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3746 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3747 cErrors++;
3748 continue;
3749 }
3750
3751 if ( !PdeSrc.b.u1Size
3752 || !fBigPagesSupported)
3753 {
3754 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3755# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3756 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3757# endif
3758 }
3759 else
3760 {
3761# if PGM_GST_TYPE == PGM_TYPE_32BIT
3762 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3763 {
3764 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3765 GCPtr, (uint64_t)PdeSrc.u));
3766 cErrors++;
3767 continue;
3768 }
3769# endif
3770 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3771# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3772 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3773# endif
3774 }
3775
3776 if ( pPoolPage->enmKind
3777 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3778 {
3779 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3780 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3781 cErrors++;
3782 }
3783
3784 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3785 if (!pPhysPage)
3786 {
3787 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3788 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3789 cErrors++;
3790 continue;
3791 }
3792
3793 if (GCPhysGst != pPoolPage->GCPhys)
3794 {
3795 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3796 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3797 cErrors++;
3798 continue;
3799 }
3800
3801 if ( !PdeSrc.b.u1Size
3802 || !fBigPagesSupported)
3803 {
3804 /*
3805 * Page Table.
3806 */
3807 const GSTPT *pPTSrc;
3808 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3809 if (RT_FAILURE(rc))
3810 {
3811 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3812 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3813 cErrors++;
3814 continue;
3815 }
3816 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3817 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3818 {
3819 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3820 // (This problem will go away when/if we shadow multiple CR3s.)
3821 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3822 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3823 cErrors++;
3824 continue;
3825 }
3826 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3827 {
3828 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3829 GCPtr, (uint64_t)PdeDst.u));
3830 cErrors++;
3831 continue;
3832 }
3833
3834 /* iterate the page table. */
3835# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3836 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3837 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3838# else
3839 const unsigned offPTSrc = 0;
3840# endif
3841 for (unsigned iPT = 0, off = 0;
3842 iPT < RT_ELEMENTS(pPTDst->a);
3843 iPT++, off += PAGE_SIZE)
3844 {
3845 const SHWPTE PteDst = pPTDst->a[iPT];
3846
3847 /* skip not-present entries. */
3848 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3849 continue;
3850 Assert(PteDst.n.u1Present);
3851
3852 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3853 if (!PteSrc.n.u1Present)
3854 {
3855# ifdef IN_RING3
3856 PGMAssertHandlerAndFlagsInSync(pVM);
3857 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3858# endif
3859 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3860 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3861 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3862 cErrors++;
3863 continue;
3864 }
3865
3866 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3867# if 1 /** @todo sync accessed bit properly... */
3868 fIgnoreFlags |= X86_PTE_A;
3869# endif
3870
3871 /* match the physical addresses */
3872 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3873 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3874
3875# ifdef IN_RING3
3876 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3877 if (RT_FAILURE(rc))
3878 {
3879 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3880 {
3881 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3882 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3883 cErrors++;
3884 continue;
3885 }
3886 }
3887 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3888 {
3889 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3890 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3891 cErrors++;
3892 continue;
3893 }
3894# endif
3895
3896 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3897 if (!pPhysPage)
3898 {
3899# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3900 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3901 {
3902 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3903 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3904 cErrors++;
3905 continue;
3906 }
3907# endif
3908 if (PteDst.n.u1Write)
3909 {
3910 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3911 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3912 cErrors++;
3913 }
3914 fIgnoreFlags |= X86_PTE_RW;
3915 }
3916 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
3917 {
3918 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3919 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3920 cErrors++;
3921 continue;
3922 }
3923
3924 /* flags */
3925 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3926 {
3927 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3928 {
3929 if (PteDst.n.u1Write)
3930 {
3931 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3932 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3933 cErrors++;
3934 continue;
3935 }
3936 fIgnoreFlags |= X86_PTE_RW;
3937 }
3938 else
3939 {
3940 if (PteDst.n.u1Present)
3941 {
3942 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3943 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3944 cErrors++;
3945 continue;
3946 }
3947 fIgnoreFlags |= X86_PTE_P;
3948 }
3949 }
3950 else
3951 {
3952 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3953 {
3954 if (PteDst.n.u1Write)
3955 {
3956 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3957 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3958 cErrors++;
3959 continue;
3960 }
3961 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3962 {
3963 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3964 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3965 cErrors++;
3966 continue;
3967 }
3968 if (PteDst.n.u1Dirty)
3969 {
3970 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3971 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3972 cErrors++;
3973 }
3974# if 0 /** @todo sync access bit properly... */
3975 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3976 {
3977 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3978 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3979 cErrors++;
3980 }
3981 fIgnoreFlags |= X86_PTE_RW;
3982# else
3983 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3984# endif
3985 }
3986 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3987 {
3988 /* access bit emulation (not implemented). */
3989 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3990 {
3991 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3992 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3993 cErrors++;
3994 continue;
3995 }
3996 if (!PteDst.n.u1Accessed)
3997 {
3998 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3999 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4000 cErrors++;
4001 }
4002 fIgnoreFlags |= X86_PTE_P;
4003 }
4004# ifdef DEBUG_sandervl
4005 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4006# endif
4007 }
4008
4009 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4010 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4011 )
4012 {
4013 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4014 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4015 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4016 cErrors++;
4017 continue;
4018 }
4019 } /* foreach PTE */
4020 }
4021 else
4022 {
4023 /*
4024 * Big Page.
4025 */
4026 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4027 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4028 {
4029 if (PdeDst.n.u1Write)
4030 {
4031 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4032 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4033 cErrors++;
4034 continue;
4035 }
4036 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4037 {
4038 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4039 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4040 cErrors++;
4041 continue;
4042 }
4043# if 0 /** @todo sync access bit properly... */
4044 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4045 {
4046 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4047 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4048 cErrors++;
4049 }
4050 fIgnoreFlags |= X86_PTE_RW;
4051# else
4052 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4053# endif
4054 }
4055 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4056 {
4057 /* access bit emulation (not implemented). */
4058 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4059 {
4060 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4061 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4062 cErrors++;
4063 continue;
4064 }
4065 if (!PdeDst.n.u1Accessed)
4066 {
4067 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4068 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4069 cErrors++;
4070 }
4071 fIgnoreFlags |= X86_PTE_P;
4072 }
4073
4074 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4075 {
4076 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4077 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4078 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4079 cErrors++;
4080 }
4081
4082 /* iterate the page table. */
4083 for (unsigned iPT = 0, off = 0;
4084 iPT < RT_ELEMENTS(pPTDst->a);
4085 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4086 {
4087 const SHWPTE PteDst = pPTDst->a[iPT];
4088
4089 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4090 {
4091 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4092 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4093 cErrors++;
4094 }
4095
4096 /* skip not-present entries. */
4097 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4098 continue;
4099
4100 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4101
4102 /* match the physical addresses */
4103 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4104
4105# ifdef IN_RING3
4106 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4107 if (RT_FAILURE(rc))
4108 {
4109 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4110 {
4111 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4112 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4113 cErrors++;
4114 }
4115 }
4116 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4117 {
4118 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4119 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4120 cErrors++;
4121 continue;
4122 }
4123# endif
4124 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4125 if (!pPhysPage)
4126 {
4127# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4128 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4129 {
4130 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4131 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4132 cErrors++;
4133 continue;
4134 }
4135# endif
4136 if (PteDst.n.u1Write)
4137 {
4138 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4139 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4140 cErrors++;
4141 }
4142 fIgnoreFlags |= X86_PTE_RW;
4143 }
4144 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4145 {
4146 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4147 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4148 cErrors++;
4149 continue;
4150 }
4151
4152 /* flags */
4153 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4154 {
4155 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4156 {
4157 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4158 {
4159 if (PteDst.n.u1Write)
4160 {
4161 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4162 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4163 cErrors++;
4164 continue;
4165 }
4166 fIgnoreFlags |= X86_PTE_RW;
4167 }
4168 }
4169 else
4170 {
4171 if (PteDst.n.u1Present)
4172 {
4173 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4174 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4175 cErrors++;
4176 continue;
4177 }
4178 fIgnoreFlags |= X86_PTE_P;
4179 }
4180 }
4181
4182 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4183 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4184 )
4185 {
4186 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4187 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4188 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4189 cErrors++;
4190 continue;
4191 }
4192 } /* for each PTE */
4193 }
4194 }
4195 /* not present */
4196
4197 } /* for each PDE */
4198
4199 } /* for each PDPTE */
4200
4201 } /* for each PML4E */
4202
4203# ifdef DEBUG
4204 if (cErrors)
4205 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4206# endif
4207
4208#endif /* GST == 32BIT, PAE or AMD64 */
4209 return cErrors;
4210
4211#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4212}
4213#endif /* VBOX_STRICT */
4214
4215
4216/**
4217 * Sets up the CR3 for shadow paging
4218 *
4219 * @returns Strict VBox status code.
4220 * @retval VINF_SUCCESS.
4221 *
4222 * @param pVCpu The VMCPU handle.
4223 * @param GCPhysCR3 The physical address in the CR3 register.
4224 */
4225PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4226{
4227 PVM pVM = pVCpu->CTX_SUFF(pVM);
4228
4229 /* Update guest paging info. */
4230#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4231 || PGM_GST_TYPE == PGM_TYPE_PAE \
4232 || PGM_GST_TYPE == PGM_TYPE_AMD64
4233
4234 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4235
4236 /*
4237 * Map the page CR3 points at.
4238 */
4239 RTHCPTR HCPtrGuestCR3;
4240 RTHCPHYS HCPhysGuestCR3;
4241 pgmLock(pVM);
4242 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4243 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4244 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPage);
4245 /** @todo this needs some reworking wrt. locking. */
4246# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4247 HCPtrGuestCR3 = NIL_RTHCPTR;
4248 int rc = VINF_SUCCESS;
4249# else
4250 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4251# endif
4252 pgmUnlock(pVM);
4253 if (RT_SUCCESS(rc))
4254 {
4255 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4256 if (RT_SUCCESS(rc))
4257 {
4258# ifdef IN_RC
4259 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4260# endif
4261# if PGM_GST_TYPE == PGM_TYPE_32BIT
4262 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4263# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4264 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4265# endif
4266 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))pVM->pgm.s.GCPtrCR3Mapping;
4267
4268# elif PGM_GST_TYPE == PGM_TYPE_PAE
4269 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4270 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4271# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4272 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4273# endif
4274 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RCPTRTYPE(uint8_t *))pVM->pgm.s.GCPtrCR3Mapping + off);
4275 Log(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4276
4277 /*
4278 * Map the 4 PDs too.
4279 */
4280 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVCpu->pgm.s);
4281 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4282 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4283 {
4284 if (pGuestPDPT->a[i].n.u1Present)
4285 {
4286 RTHCPTR HCPtr;
4287 RTHCPHYS HCPhys;
4288 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4289 pgmLock(pVM);
4290 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4291 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4292 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4293# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4294 HCPtr = NIL_RTHCPTR;
4295 int rc2 = VINF_SUCCESS;
4296# else
4297 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4298# endif
4299 pgmUnlock(pVM);
4300 if (RT_SUCCESS(rc2))
4301 {
4302 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4303 AssertRCReturn(rc, rc);
4304
4305 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4306# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4307 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4308# endif
4309 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))GCPtr;
4310 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4311# ifdef IN_RC
4312 PGM_INVL_PG(pVCpu, GCPtr);
4313# endif
4314 continue;
4315 }
4316 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4317 }
4318
4319 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4320# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4321 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4322# endif
4323 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4324 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4325# ifdef IN_RC
4326 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4327# endif
4328 }
4329
4330# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4331 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4332# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4333 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4334# endif
4335# endif
4336 }
4337 else
4338 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4339 }
4340 else
4341 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4342
4343#else /* prot/real stub */
4344 int rc = VINF_SUCCESS;
4345#endif
4346
4347 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4348# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4349 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4350 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4351 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4352 && PGM_GST_TYPE != PGM_TYPE_PROT))
4353
4354 Assert(!HWACCMIsNestedPagingActive(pVM));
4355
4356 /*
4357 * Update the shadow root page as well since that's not fixed.
4358 */
4359 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4360 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4361 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4362 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4363 PPGMPOOLPAGE pNewShwPageCR3;
4364
4365 pgmLock(pVM);
4366
4367 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4368 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4369 AssertFatalRC(rc);
4370 rc = VINF_SUCCESS;
4371
4372# ifdef IN_RC
4373 /*
4374 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4375 * state will be inconsistent! Flush important things now while
4376 * we still can and then make sure there are no ring-3 calls.
4377 */
4378 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4379 VMMRZCallRing3Disable(pVCpu);
4380# endif
4381
4382 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4383 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4384 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4385# ifdef IN_RING0
4386 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4387 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4388# elif defined(IN_RC)
4389 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4390 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4391# else
4392 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4393 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4394# endif
4395
4396# ifndef PGM_WITHOUT_MAPPINGS
4397 /*
4398 * Apply all hypervisor mappings to the new CR3.
4399 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4400 * make sure we check for conflicts in the new CR3 root.
4401 */
4402# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4403 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4404# endif
4405 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4406 AssertRCReturn(rc, rc);
4407# endif
4408
4409 /* Set the current hypervisor CR3. */
4410 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4411 SELMShadowCR3Changed(pVM, pVCpu);
4412
4413# ifdef IN_RC
4414 /* NOTE: The state is consistent again. */
4415 VMMRZCallRing3Enable(pVCpu);
4416# endif
4417
4418 /* Clean up the old CR3 root. */
4419 if (pOldShwPageCR3)
4420 {
4421 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4422# ifndef PGM_WITHOUT_MAPPINGS
4423 /* Remove the hypervisor mappings from the shadow page table. */
4424 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4425# endif
4426 /* Mark the page as unlocked; allow flushing again. */
4427 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4428
4429 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4430 }
4431 pgmUnlock(pVM);
4432# endif
4433
4434 return rc;
4435}
4436
4437/**
4438 * Unmaps the shadow CR3.
4439 *
4440 * @returns VBox status, no specials.
4441 * @param pVCpu The VMCPU handle.
4442 */
4443PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4444{
4445 LogFlow(("UnmapCR3\n"));
4446
4447 int rc = VINF_SUCCESS;
4448 PVM pVM = pVCpu->CTX_SUFF(pVM);
4449
4450 /*
4451 * Update guest paging info.
4452 */
4453#if PGM_GST_TYPE == PGM_TYPE_32BIT
4454 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4455# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4456 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4457# endif
4458 pVCpu->pgm.s.pGst32BitPdRC = 0;
4459
4460#elif PGM_GST_TYPE == PGM_TYPE_PAE
4461 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4462# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4463 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4464# endif
4465 pVCpu->pgm.s.pGstPaePdptRC = 0;
4466 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4467 {
4468 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4469# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4470 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4471# endif
4472 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4473 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4474 }
4475
4476#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4477 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4478# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4479 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4480# endif
4481
4482#else /* prot/real mode stub */
4483 /* nothing to do */
4484#endif
4485
4486#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4487 /*
4488 * Update shadow paging info.
4489 */
4490# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4491 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4492 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4493
4494# if PGM_GST_TYPE != PGM_TYPE_REAL
4495 Assert(!HWACCMIsNestedPagingActive(pVM));
4496# endif
4497
4498 pgmLock(pVM);
4499
4500# ifndef PGM_WITHOUT_MAPPINGS
4501 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4502 /* Remove the hypervisor mappings from the shadow page table. */
4503 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4504# endif
4505
4506 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4507 {
4508 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4509
4510 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4511
4512 /* Mark the page as unlocked; allow flushing again. */
4513 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4514
4515 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4516 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4517 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4518 pVCpu->pgm.s.pShwPageCR3RC = 0;
4519 pVCpu->pgm.s.iShwUser = 0;
4520 pVCpu->pgm.s.iShwUserTable = 0;
4521 }
4522 pgmUnlock(pVM);
4523# endif
4524#endif /* !IN_RC*/
4525
4526 return rc;
4527}
4528
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette