VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 17517

Last change on this file since 17517 was 17509, checked in by vboxsync, 16 years ago

PGM: Moved the page pool PT flushing code in the access handler bits to where it belongs and called it pgmPoolTrackFlushGCPhys. Fixed a status code corruption bug in PGMR3PhysTlbGCPhys2Ptr (new phys). Made lazy zero page replacement code work in the new code, it's disabled by default because it frequently requires flushing the shadow page pool because the tracking code assuming the HCPhys of a PGMPAGE is unique and never shared.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 212.9 KB
Line 
1/* $Id: PGMAllBth.h 17509 2009-03-07 01:30:23Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27__BEGIN_DECLS
28PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42PGM_BTH_DECL(int, MapCR3)(PVM pVM, RTGCPHYS GCPhysCR3);
43PGM_BTH_DECL(int, UnmapCR3)(PVM pVM);
44__END_DECLS
45
46
47/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
48#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
49# error "Invalid combination; PAE guest implies PAE shadow"
50#endif
51
52#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
53 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
54# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
55#endif
56
57#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
58 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
59# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
60#endif
61
62#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
63 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
64# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
65#endif
66
67#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
68# define PGM_WITHOUT_MAPPINGS
69#endif
70
71
72#ifndef IN_RING3
73/**
74 * #PF Handler for raw-mode guest execution.
75 *
76 * @returns VBox status code (appropriate for trap handling and GC return).
77 * @param pVM VM Handle.
78 * @param uErr The trap error code.
79 * @param pRegFrame Trap register frame.
80 * @param pvFault The fault address.
81 */
82PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
83{
84# if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY) && defined(VBOX_STRICT)
85 PGMDynCheckLocks(pVM);
86# endif
87
88# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
89 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
90 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
91
92# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
93 /*
94 * Hide the instruction fetch trap indicator for now.
95 */
96 /** @todo NXE will change this and we must fix NXE in the switcher too! */
97 if (uErr & X86_TRAP_PF_ID)
98 {
99 uErr &= ~X86_TRAP_PF_ID;
100 TRPMSetErrorCode(pVM, uErr);
101 }
102# endif
103
104 /*
105 * Get PDs.
106 */
107 int rc;
108# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
109# if PGM_GST_TYPE == PGM_TYPE_32BIT
110 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
111 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
112
113# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
114
115# if PGM_GST_TYPE == PGM_TYPE_PAE
116 unsigned iPDSrc;
117# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
118 X86PDPE PdpeSrc;
119 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
120# else
121 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, pvFault, &iPDSrc, NULL);
122# endif
123
124# elif PGM_GST_TYPE == PGM_TYPE_AMD64
125 unsigned iPDSrc;
126 PX86PML4E pPml4eSrc;
127 X86PDPE PdpeSrc;
128 PGSTPD pPDSrc;
129
130 pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
131 Assert(pPml4eSrc);
132# endif
133
134 /* Quick check for a valid guest trap. (PAE & AMD64) */
135 if (!pPDSrc)
136 {
137# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
138 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK));
139# else
140 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK));
141# endif
142 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2GuestTrap; });
143 TRPMSetErrorCode(pVM, uErr);
144 return VINF_EM_RAW_GUEST_TRAP;
145 }
146# endif
147
148# else /* !PGM_WITH_PAGING */
149 PGSTPD pPDSrc = NULL;
150 const unsigned iPDSrc = 0;
151# endif /* !PGM_WITH_PAGING */
152
153
154# if PGM_SHW_TYPE == PGM_TYPE_32BIT
155 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
156 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVM->pgm.s);
157
158# elif PGM_SHW_TYPE == PGM_TYPE_PAE
159 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
160
161# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
162 PX86PDPAE pPDDst;
163# if PGM_GST_TYPE != PGM_TYPE_PAE
164 X86PDPE PdpeSrc;
165
166 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
167 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
168# endif
169 rc = pgmShwSyncPaePDPtr(pVM, pvFault, &PdpeSrc, &pPDDst);
170 if (rc != VINF_SUCCESS)
171 {
172 AssertRC(rc);
173 return rc;
174 }
175 Assert(pPDDst);
176
177# else
178 PX86PDPAE pPDDst = pgmShwGetPaePDPtr(&pVM->pgm.s, pvFault);
179
180 /* Did we mark the PDPT as not present in SyncCR3? */
181 unsigned iPdpt = (pvFault >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
182 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
183 if (!pPdptDst->a[iPdpt].n.u1Present)
184 pPdptDst->a[iPdpt].n.u1Present = 1;
185# endif
186
187# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
188 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
189 PX86PDPAE pPDDst;
190# if PGM_GST_TYPE == PGM_TYPE_PROT
191 /* AMD-V nested paging */
192 X86PML4E Pml4eSrc;
193 X86PDPE PdpeSrc;
194 PX86PML4E pPml4eSrc = &Pml4eSrc;
195
196 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
197 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
198 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
199# endif
200
201 rc = pgmShwSyncLongModePDPtr(pVM, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
202 if (rc != VINF_SUCCESS)
203 {
204 AssertRC(rc);
205 return rc;
206 }
207 Assert(pPDDst);
208
209# elif PGM_SHW_TYPE == PGM_TYPE_EPT
210 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
211 PEPTPD pPDDst;
212
213 rc = pgmShwGetEPTPDPtr(pVM, pvFault, NULL, &pPDDst);
214 if (rc != VINF_SUCCESS)
215 {
216 AssertRC(rc);
217 return rc;
218 }
219 Assert(pPDDst);
220# endif
221
222# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
223 /*
224 * If we successfully correct the write protection fault due to dirty bit
225 * tracking, or this page fault is a genuine one, then return immediately.
226 */
227 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
228 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
229 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
230 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
231 || rc == VINF_EM_RAW_GUEST_TRAP)
232 {
233 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
234 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVM->pgm.s.StatRZTrap0eTime2GuestTrap; });
235 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
236 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
237 }
238
239 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0ePD[iPDSrc]);
240# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
241
242 /*
243 * A common case is the not-present error caused by lazy page table syncing.
244 *
245 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
246 * so we can safely assume that the shadow PT is present when calling SyncPage later.
247 *
248 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
249 * of mapping conflict and defer to SyncCR3 in R3.
250 * (Again, we do NOT support access handlers for non-present guest pages.)
251 *
252 */
253# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
254 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
255# else
256 GSTPDE PdeSrc;
257 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
258 PdeSrc.n.u1Present = 1;
259 PdeSrc.n.u1Write = 1;
260 PdeSrc.n.u1Accessed = 1;
261 PdeSrc.n.u1User = 1;
262# endif
263 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
264 && !pPDDst->a[iPDDst].n.u1Present
265 && PdeSrc.n.u1Present
266 )
267
268 {
269 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2SyncPT; });
270 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
271 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
272 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, pvFault);
273 if (RT_SUCCESS(rc))
274 {
275 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
276 return rc;
277 }
278 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
279 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
280 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
281 return VINF_PGM_SYNC_CR3;
282 }
283
284# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
285 /*
286 * Check if this address is within any of our mappings.
287 *
288 * This is *very* fast and it's gonna save us a bit of effort below and prevent
289 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
290 * (BTW, it's impossible to have physical access handlers in a mapping.)
291 */
292 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
293 {
294 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
295 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
296 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
297 {
298 if (pvFault < pMapping->GCPtr)
299 break;
300 if (pvFault - pMapping->GCPtr < pMapping->cb)
301 {
302 /*
303 * The first thing we check is if we've got an undetected conflict.
304 */
305 if (!pVM->pgm.s.fMappingsFixed)
306 {
307 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
308 while (iPT-- > 0)
309 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
310 {
311 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eConflicts);
312 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
313 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
314 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
315 return VINF_PGM_SYNC_CR3;
316 }
317 }
318
319 /*
320 * Check if the fault address is in a virtual page access handler range.
321 */
322 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
323 if ( pCur
324 && pvFault - pCur->Core.Key < pCur->cb
325 && uErr & X86_TRAP_PF_RW)
326 {
327# ifdef IN_RC
328 STAM_PROFILE_START(&pCur->Stat, h);
329 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
330 STAM_PROFILE_STOP(&pCur->Stat, h);
331# else
332 AssertFailed();
333 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
334# endif
335 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersMapping);
336 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
337 return rc;
338 }
339
340 /*
341 * Pretend we're not here and let the guest handle the trap.
342 */
343 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
344 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eGuestPFMapping);
345 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
346 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
347 return VINF_EM_RAW_GUEST_TRAP;
348 }
349 }
350 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
351 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
352# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
353
354 /*
355 * Check if this fault address is flagged for special treatment,
356 * which means we'll have to figure out the physical address and
357 * check flags associated with it.
358 *
359 * ASSUME that we can limit any special access handling to pages
360 * in page tables which the guest believes to be present.
361 */
362 if (PdeSrc.n.u1Present)
363 {
364 RTGCPHYS GCPhys = NIL_RTGCPHYS;
365
366# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
367# if PGM_GST_TYPE == PGM_TYPE_AMD64
368 bool fBigPagesSupported = true;
369# else
370 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
371# endif
372 if ( PdeSrc.b.u1Size
373 && fBigPagesSupported)
374 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
375 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
376 else
377 {
378 PGSTPT pPTSrc;
379 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
380 if (RT_SUCCESS(rc))
381 {
382 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
383 if (pPTSrc->a[iPTESrc].n.u1Present)
384 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
385 }
386 }
387# else
388 /* No paging so the fault address is the physical address */
389 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
390# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
391
392 /*
393 * If we have a GC address we'll check if it has any flags set.
394 */
395 if (GCPhys != NIL_RTGCPHYS)
396 {
397 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
398
399 PPGMPAGE pPage;
400 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
401 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
402 {
403 if ( PGM_PAGE_HAS_ACTIVE_PHYSICAL_HANDLERS(pPage)
404 || PGM_PAGE_HAS_ACTIVE_VIRTUAL_HANDLERS(pPage))
405 {
406 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
407 {
408 /*
409 * Physical page access handler.
410 */
411 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
412 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
413 if (pCur)
414 {
415# ifdef PGM_SYNC_N_PAGES
416 /*
417 * If the region is write protected and we got a page not present fault, then sync
418 * the pages. If the fault was caused by a read, then restart the instruction.
419 * In case of write access continue to the GC write handler.
420 *
421 * ASSUMES that there is only one handler per page or that they have similar write properties.
422 */
423 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
424 && !(uErr & X86_TRAP_PF_P))
425 {
426 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
427 if ( RT_FAILURE(rc)
428 || !(uErr & X86_TRAP_PF_RW)
429 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
430 {
431 AssertRC(rc);
432 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
433 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
434 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
435 return rc;
436 }
437 }
438# endif
439
440 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
441 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
442 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
443
444# if defined(IN_RC) || defined(IN_RING0)
445 if (pCur->CTX_SUFF(pfnHandler))
446 {
447 STAM_PROFILE_START(&pCur->Stat, h);
448 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pCur->CTX_SUFF(pvUser));
449 STAM_PROFILE_STOP(&pCur->Stat, h);
450 }
451 else
452# endif
453 rc = VINF_EM_RAW_EMULATE_INSTR;
454 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersPhysical);
455 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
456 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndPhys; });
457 return rc;
458 }
459 }
460# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
461 else
462 {
463# ifdef PGM_SYNC_N_PAGES
464 /*
465 * If the region is write protected and we got a page not present fault, then sync
466 * the pages. If the fault was caused by a read, then restart the instruction.
467 * In case of write access continue to the GC write handler.
468 */
469 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
470 && !(uErr & X86_TRAP_PF_P))
471 {
472 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
473 if ( RT_FAILURE(rc)
474 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
475 || !(uErr & X86_TRAP_PF_RW))
476 {
477 AssertRC(rc);
478 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
479 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
480 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
481 return rc;
482 }
483 }
484# endif
485 /*
486 * Ok, it's an virtual page access handler.
487 *
488 * Since it's faster to search by address, we'll do that first
489 * and then retry by GCPhys if that fails.
490 */
491 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
492 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
493 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
494 */
495 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
496 if (pCur)
497 {
498 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
499 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
500 || !(uErr & X86_TRAP_PF_P)
501 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
502 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
503
504 if ( pvFault - pCur->Core.Key < pCur->cb
505 && ( uErr & X86_TRAP_PF_RW
506 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
507 {
508# ifdef IN_RC
509 STAM_PROFILE_START(&pCur->Stat, h);
510 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
511 STAM_PROFILE_STOP(&pCur->Stat, h);
512# else
513 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
514# endif
515 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtual);
516 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
517 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
518 return rc;
519 }
520 /* Unhandled part of a monitored page */
521 }
522 else
523 {
524 /* Check by physical address. */
525 PPGMVIRTHANDLER pCur;
526 unsigned iPage;
527 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
528 &pCur, &iPage);
529 Assert(RT_SUCCESS(rc) || !pCur);
530 if ( pCur
531 && ( uErr & X86_TRAP_PF_RW
532 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
533 {
534 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
535# ifdef IN_RC
536 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
537 Assert(off < pCur->cb);
538 STAM_PROFILE_START(&pCur->Stat, h);
539 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
540 STAM_PROFILE_STOP(&pCur->Stat, h);
541# else
542 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
543# endif
544 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
545 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
546 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
547 return rc;
548 }
549 }
550 }
551# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
552
553 /*
554 * There is a handled area of the page, but this fault doesn't belong to it.
555 * We must emulate the instruction.
556 *
557 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
558 * we first check if this was a page-not-present fault for a page with only
559 * write access handlers. Restart the instruction if it wasn't a write access.
560 */
561 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersUnhandled);
562
563 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
564 && !(uErr & X86_TRAP_PF_P))
565 {
566 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
567 if ( RT_FAILURE(rc)
568 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
569 || !(uErr & X86_TRAP_PF_RW))
570 {
571 AssertRC(rc);
572 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
573 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
574 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
575 return rc;
576 }
577 }
578
579 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
580 * It's writing to an unhandled part of the LDT page several million times.
581 */
582 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
583 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
584 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
585 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndUnhandled; });
586 return rc;
587 } /* if any kind of handler */
588
589# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
590 if (uErr & X86_TRAP_PF_P)
591 {
592 /*
593 * The page isn't marked, but it might still be monitored by a virtual page access handler.
594 * (ASSUMES no temporary disabling of virtual handlers.)
595 */
596 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
597 * we should correct both the shadow page table and physical memory flags, and not only check for
598 * accesses within the handler region but for access to pages with virtual handlers. */
599 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
600 if (pCur)
601 {
602 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
603 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
604 || !(uErr & X86_TRAP_PF_P)
605 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
606 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
607
608 if ( pvFault - pCur->Core.Key < pCur->cb
609 && ( uErr & X86_TRAP_PF_RW
610 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
611 {
612# ifdef IN_RC
613 STAM_PROFILE_START(&pCur->Stat, h);
614 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
615 STAM_PROFILE_STOP(&pCur->Stat, h);
616# else
617 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
618# endif
619 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
620 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
621 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
622 return rc;
623 }
624 }
625 }
626# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
627 }
628 else
629 {
630 /*
631 * When the guest accesses invalid physical memory (e.g. probing
632 * of RAM or accessing a remapped MMIO range), then we'll fall
633 * back to the recompiler to emulate the instruction.
634 */
635 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
636 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersInvalid);
637 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
638 return VINF_EM_RAW_EMULATE_INSTR;
639 }
640
641 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
642
643# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
644 /*
645 * We are here only if page is present in Guest page tables and
646 * trap is not handled by our handlers.
647 *
648 * Check it for page out-of-sync situation.
649 */
650 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
651
652 if (!(uErr & X86_TRAP_PF_P))
653 {
654 /*
655 * Page is not present in our page tables.
656 * Try to sync it!
657 * BTW, fPageShw is invalid in this branch!
658 */
659 if (uErr & X86_TRAP_PF_US)
660 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
661 else /* supervisor */
662 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
663
664# if defined(LOG_ENABLED) && !defined(IN_RING0)
665 RTGCPHYS GCPhys;
666 uint64_t fPageGst;
667 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
668 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%RGp scan=%d\n",
669 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)));
670# endif /* LOG_ENABLED */
671
672# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
673 if (CPUMGetGuestCPL(pVM, pRegFrame) == 0)
674 {
675 uint64_t fPageGst;
676 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
677 if ( RT_SUCCESS(rc)
678 && !(fPageGst & X86_PTE_US))
679 {
680 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
681 if ( pvFault == (RTGCPTR)pRegFrame->eip
682 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
683# ifdef CSAM_DETECT_NEW_CODE_PAGES
684 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
685 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)) /* any new code we encounter here */
686# endif /* CSAM_DETECT_NEW_CODE_PAGES */
687 )
688 {
689 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
690 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
691 if (rc != VINF_SUCCESS)
692 {
693 /*
694 * CSAM needs to perform a job in ring 3.
695 *
696 * Sync the page before going to the host context; otherwise we'll end up in a loop if
697 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
698 */
699 LogFlow(("CSAM ring 3 job\n"));
700 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, 1, uErr);
701 AssertRC(rc2);
702
703 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
704 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2CSAM; });
705 return rc;
706 }
707 }
708# ifdef CSAM_DETECT_NEW_CODE_PAGES
709 else if ( uErr == X86_TRAP_PF_RW
710 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
711 && pRegFrame->ecx < 0x10000)
712 {
713 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
714 * to detect loading of new code pages.
715 */
716
717 /*
718 * Decode the instruction.
719 */
720 RTGCPTR PC;
721 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
722 if (rc == VINF_SUCCESS)
723 {
724 DISCPUSTATE Cpu;
725 uint32_t cbOp;
726 rc = EMInterpretDisasOneEx(pVM, PC, pRegFrame, &Cpu, &cbOp);
727
728 /* For now we'll restrict this to rep movsw/d instructions */
729 if ( rc == VINF_SUCCESS
730 && Cpu.pCurInstr->opcode == OP_MOVSWD
731 && (Cpu.prefix & PREFIX_REP))
732 {
733 CSAMMarkPossibleCodePage(pVM, pvFault);
734 }
735 }
736 }
737# endif /* CSAM_DETECT_NEW_CODE_PAGES */
738
739 /*
740 * Mark this page as safe.
741 */
742 /** @todo not correct for pages that contain both code and data!! */
743 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
744 CSAMMarkPage(pVM, (RTRCPTR)pvFault, true);
745 }
746 }
747# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
748 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
749 if (RT_SUCCESS(rc))
750 {
751 /* The page was successfully synced, return to the guest. */
752 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
753 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSync; });
754 return VINF_SUCCESS;
755 }
756 }
757 else /* uErr & X86_TRAP_PF_P: */
758 {
759 /*
760 * Write protected pages is make writable when the guest makes the first
761 * write to it. This happens for pages that are shared, write monitored
762 * and not yet allocated.
763 *
764 * Also, a side effect of not flushing global PDEs are out of sync pages due
765 * to physical monitored regions, that are no longer valid.
766 * Assume for now it only applies to the read/write flag.
767 */
768 if (RT_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
769 {
770# ifdef VBOX_WITH_NEW_PHYS_CODE
771 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
772 {
773 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n",
774 GCPhys, pPage, pvFault, uErr));
775 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
776 if (rc != VINF_SUCCESS)
777 {
778 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
779 return rc;
780 }
781 }
782 /// @todo count the above case; else
783# endif /* VBOX_WITH_NEW_PHYS_CODE */
784 if (uErr & X86_TRAP_PF_US)
785 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
786 else /* supervisor */
787 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
788
789 /*
790 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
791 * page is not present, which is not true in this case.
792 */
793 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, 1, uErr);
794 if (RT_SUCCESS(rc))
795 {
796 /*
797 * Page was successfully synced, return to guest.
798 */
799# ifdef VBOX_STRICT
800 RTGCPHYS GCPhys;
801 uint64_t fPageGst;
802 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
803 Assert(RT_SUCCESS(rc) && fPageGst & X86_PTE_RW);
804 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
805
806 uint64_t fPageShw;
807 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
808 AssertMsg(RT_SUCCESS(rc) && fPageShw & X86_PTE_RW, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
809# endif /* VBOX_STRICT */
810 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
811 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
812 return VINF_SUCCESS;
813 }
814
815 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
816 if ( CPUMGetGuestCPL(pVM, pRegFrame) == 0
817 && ((CPUMGetGuestCR0(pVM) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG)
818 && (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P))
819 {
820 uint64_t fPageGst;
821 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
822 if ( RT_SUCCESS(rc)
823 && !(fPageGst & X86_PTE_RW))
824 {
825 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
826 if (RT_SUCCESS(rc))
827 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eWPEmulInRZ);
828 else
829 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eWPEmulToR3);
830 return rc;
831 }
832 AssertMsgFailed(("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
833 }
834 }
835
836# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
837# ifdef VBOX_STRICT
838 /*
839 * Check for VMM page flags vs. Guest page flags consistency.
840 * Currently only for debug purposes.
841 */
842 if (RT_SUCCESS(rc))
843 {
844 /* Get guest page flags. */
845 uint64_t fPageGst;
846 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
847 if (RT_SUCCESS(rc))
848 {
849 uint64_t fPageShw;
850 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
851
852 /*
853 * Compare page flags.
854 * Note: we have AVL, A, D bits desynched.
855 */
856 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
857 ("Page flags mismatch! pvFault=%RGv GCPhys=%RGp fPageShw=%08llx fPageGst=%08llx\n", pvFault, GCPhys, fPageShw, fPageGst));
858 }
859 else
860 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
861 }
862 else
863 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
864# endif /* VBOX_STRICT */
865# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
866 }
867 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
868# endif /* PGM_OUT_OF_SYNC_IN_GC */
869 }
870 else /* GCPhys == NIL_RTGCPHYS */
871 {
872 /*
873 * Page not present in Guest OS or invalid page table address.
874 * This is potential virtual page access handler food.
875 *
876 * For the present we'll say that our access handlers don't
877 * work for this case - we've already discarded the page table
878 * not present case which is identical to this.
879 *
880 * When we perchance find we need this, we will probably have AVL
881 * trees (offset based) to operate on and we can measure their speed
882 * agains mapping a page table and probably rearrange this handling
883 * a bit. (Like, searching virtual ranges before checking the
884 * physical address.)
885 */
886 }
887 }
888 /* else: !present (guest) */
889
890
891# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
892 /*
893 * Conclusion, this is a guest trap.
894 */
895 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
896 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eGuestPFUnh);
897 return VINF_EM_RAW_GUEST_TRAP;
898# else
899 /* present, but not a monitored page; perhaps the guest is probing physical memory */
900 return VINF_EM_RAW_EMULATE_INSTR;
901# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
902
903
904# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
905
906 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
907 return VERR_INTERNAL_ERROR;
908# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
909}
910#endif /* !IN_RING3 */
911
912
913/**
914 * Emulation of the invlpg instruction.
915 *
916 *
917 * @returns VBox status code.
918 *
919 * @param pVM VM handle.
920 * @param GCPtrPage Page to invalidate.
921 *
922 * @remark ASSUMES that the guest is updating before invalidating. This order
923 * isn't required by the CPU, so this is speculative and could cause
924 * trouble.
925 *
926 * @todo Flush page or page directory only if necessary!
927 * @todo Add a #define for simply invalidating the page.
928 */
929PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCPTR GCPtrPage)
930{
931#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
932 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
933 && PGM_SHW_TYPE != PGM_TYPE_EPT
934 int rc;
935
936 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
937 /*
938 * Get the shadow PD entry and skip out if this PD isn't present.
939 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
940 */
941# if PGM_SHW_TYPE == PGM_TYPE_32BIT
942 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
943 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
944
945# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
946 /* Fetch the pgm pool shadow descriptor. */
947 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
948 Assert(pShwPde);
949# endif
950
951# elif PGM_SHW_TYPE == PGM_TYPE_PAE
952 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
953 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
954
955 /* If the shadow PDPE isn't present, then skip the invalidate. */
956 if (!pPdptDst->a[iPdpt].n.u1Present)
957 {
958 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
959 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
960 return VINF_SUCCESS;
961 }
962
963# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
964 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
965 PPGMPOOLPAGE pShwPde;
966 PX86PDPAE pPDDst;
967
968 /* Fetch the pgm pool shadow descriptor. */
969 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
970 AssertRCSuccessReturn(rc, rc);
971 Assert(pShwPde);
972
973 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
974 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
975# else
976 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) /*& SHW_PD_MASK - pool index only atm! */;
977 PX86PDEPAE pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
978# endif
979
980# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
981 /* PML4 */
982# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
983 AssertReturn(pVM->pgm.s.pShwRootR3, VERR_INTERNAL_ERROR);
984# endif
985
986 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
987 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
988 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
989 PX86PDPAE pPDDst;
990 PX86PDPT pPdptDst;
991 PX86PML4E pPml4eDst;
992 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
993 if (rc != VINF_SUCCESS)
994 {
995 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
996 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
997 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
998 PGM_INVL_GUEST_TLBS();
999 return VINF_SUCCESS;
1000 }
1001 Assert(pPDDst);
1002
1003 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1004 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1005
1006 if (!pPdpeDst->n.u1Present)
1007 {
1008 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1009 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
1010 PGM_INVL_GUEST_TLBS();
1011 return VINF_SUCCESS;
1012 }
1013
1014# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1015
1016 const SHWPDE PdeDst = *pPdeDst;
1017 if (!PdeDst.n.u1Present)
1018 {
1019 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1020 return VINF_SUCCESS;
1021 }
1022
1023 /*
1024 * Get the guest PD entry and calc big page.
1025 */
1026# if PGM_GST_TYPE == PGM_TYPE_32BIT
1027 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
1028 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1029 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1030# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1031 unsigned iPDSrc;
1032# if PGM_GST_TYPE == PGM_TYPE_PAE
1033 X86PDPE PdpeSrc;
1034 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1035# else /* AMD64 */
1036 PX86PML4E pPml4eSrc;
1037 X86PDPE PdpeSrc;
1038 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1039# endif
1040 GSTPDE PdeSrc;
1041
1042 if (pPDSrc)
1043 PdeSrc = pPDSrc->a[iPDSrc];
1044 else
1045 PdeSrc.u = 0;
1046# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1047
1048# if PGM_GST_TYPE == PGM_TYPE_AMD64
1049 const bool fIsBigPage = PdeSrc.b.u1Size;
1050# else
1051 const bool fIsBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1052# endif
1053
1054# ifdef IN_RING3
1055 /*
1056 * If a CR3 Sync is pending we may ignore the invalidate page operation
1057 * depending on the kind of sync and if it's a global page or not.
1058 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1059 */
1060# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1061 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
1062 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
1063 && fIsBigPage
1064 && PdeSrc.b.u1Global
1065 )
1066 )
1067# else
1068 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1069# endif
1070 {
1071 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1072 return VINF_SUCCESS;
1073 }
1074# endif /* IN_RING3 */
1075
1076# if PGM_GST_TYPE == PGM_TYPE_AMD64
1077 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1078
1079 /* Fetch the pgm pool shadow descriptor. */
1080 PPGMPOOLPAGE pShwPdpt = pgmPoolGetPageByHCPhys(pVM, pPml4eDst->u & X86_PML4E_PG_MASK);
1081 Assert(pShwPdpt);
1082
1083 /* Fetch the pgm pool shadow descriptor. */
1084 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1085 Assert(pShwPde);
1086
1087 Assert(pPml4eDst->n.u1Present && (pPml4eDst->u & SHW_PDPT_MASK));
1088 RTGCPHYS GCPhysPdpt = pPml4eSrc->u & X86_PML4E_PG_MASK;
1089
1090 if ( !pPml4eSrc->n.u1Present
1091 || pShwPdpt->GCPhys != GCPhysPdpt)
1092 {
1093 LogFlow(("InvalidatePage: Out-of-sync PML4E (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1094 GCPtrPage, pShwPdpt->GCPhys, GCPhysPdpt, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1095 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1096 pPml4eDst->u = 0;
1097 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1098 PGM_INVL_GUEST_TLBS();
1099 return VINF_SUCCESS;
1100 }
1101 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
1102 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
1103 {
1104 /*
1105 * Mark not present so we can resync the PML4E when it's used.
1106 */
1107 LogFlow(("InvalidatePage: Out-of-sync PML4E at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1108 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1109 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1110 pPml4eDst->u = 0;
1111 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1112 PGM_INVL_GUEST_TLBS();
1113 }
1114 else if (!pPml4eSrc->n.u1Accessed)
1115 {
1116 /*
1117 * Mark not present so we can set the accessed bit.
1118 */
1119 LogFlow(("InvalidatePage: Out-of-sync PML4E (A) at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1120 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1121 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1122 pPml4eDst->u = 0;
1123 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1124 PGM_INVL_GUEST_TLBS();
1125 }
1126
1127 /* Check if the PDPT entry has changed. */
1128 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
1129 RTGCPHYS GCPhysPd = PdpeSrc.u & GST_PDPE_PG_MASK;
1130 if ( !PdpeSrc.n.u1Present
1131 || pShwPde->GCPhys != GCPhysPd)
1132 {
1133 LogFlow(("InvalidatePage: Out-of-sync PDPE (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1134 GCPtrPage, pShwPde->GCPhys, GCPhysPd, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1135 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1136 pPdpeDst->u = 0;
1137 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1138 PGM_INVL_GUEST_TLBS();
1139 return VINF_SUCCESS;
1140 }
1141 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
1142 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
1143 {
1144 /*
1145 * Mark not present so we can resync the PDPTE when it's used.
1146 */
1147 LogFlow(("InvalidatePage: Out-of-sync PDPE at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1148 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1149 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1150 pPdpeDst->u = 0;
1151 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1152 PGM_INVL_GUEST_TLBS();
1153 }
1154 else if (!PdpeSrc.lm.u1Accessed)
1155 {
1156 /*
1157 * Mark not present so we can set the accessed bit.
1158 */
1159 LogFlow(("InvalidatePage: Out-of-sync PDPE (A) at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1160 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1161 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1162 pPdpeDst->u = 0;
1163 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1164 PGM_INVL_GUEST_TLBS();
1165 }
1166# endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1167
1168# if PGM_GST_TYPE == PGM_TYPE_PAE && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1169 /*
1170 * Update the shadow PDPE and free all the shadow PD entries if the PDPE is marked not present.
1171 * Note: This shouldn't actually be necessary as we monitor the PDPT page for changes.
1172 */
1173 if (!pPDSrc)
1174 {
1175 /* Guest PDPE not present */
1176 PX86PDPAE pPDDst = pgmShwGetPaePDPtr(&pVM->pgm.s, GCPtrPage);
1177 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1178
1179 Assert(!PdpeSrc.n.u1Present);
1180 LogFlow(("InvalidatePage: guest PDPE %d not present; clear shw pdpe\n", iPdpt));
1181
1182 /* for each page directory entry */
1183 for (unsigned iPD = 0; iPD < X86_PG_PAE_ENTRIES; iPD++)
1184 {
1185 if ( pPDDst->a[iPD].n.u1Present
1186 && !(pPDDst->a[iPD].u & PGM_PDFLAGS_MAPPING))
1187 {
1188 pgmPoolFree(pVM, pPDDst->a[iPD].u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPdpt * X86_PG_PAE_ENTRIES + iPD);
1189 pPDDst->a[iPD].u = 0;
1190 }
1191 }
1192 if (!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING))
1193 pPdptDst->a[iPdpt].n.u1Present = 0;
1194 PGM_INVL_GUEST_TLBS();
1195 }
1196 AssertMsg(pVM->pgm.s.fMappingsFixed || (PdpeSrc.u & X86_PDPE_PG_MASK) == pVM->pgm.s.aGCPhysGstPaePDsMonitored[iPdpt], ("%RGp vs %RGp (mon)\n", (PdpeSrc.u & X86_PDPE_PG_MASK), pVM->pgm.s.aGCPhysGstPaePDsMonitored[iPdpt]));
1197# endif
1198
1199
1200 /*
1201 * Deal with the Guest PDE.
1202 */
1203 rc = VINF_SUCCESS;
1204 if (PdeSrc.n.u1Present)
1205 {
1206 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1207 {
1208 /*
1209 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1210 */
1211 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1212 Assert(PGMGetGuestMode(pVM) <= PGMMODE_PAE);
1213 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
1214 }
1215 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
1216 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1217 {
1218 /*
1219 * Mark not present so we can resync the PDE when it's used.
1220 */
1221 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1222 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1223# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1224 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1225# else
1226 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1227# endif
1228 pPdeDst->u = 0;
1229 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1230 PGM_INVL_GUEST_TLBS();
1231 }
1232 else if (!PdeSrc.n.u1Accessed)
1233 {
1234 /*
1235 * Mark not present so we can set the accessed bit.
1236 */
1237 LogFlow(("InvalidatePage: Out-of-sync (A) at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1238 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1239# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1240 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1241# else
1242 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1243# endif
1244 pPdeDst->u = 0;
1245 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1246 PGM_INVL_GUEST_TLBS();
1247 }
1248 else if (!fIsBigPage)
1249 {
1250 /*
1251 * 4KB - page.
1252 */
1253 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1254 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1255# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1256 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1257 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1258# endif
1259 if (pShwPage->GCPhys == GCPhys)
1260 {
1261# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1262 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1263 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1264 if (pPT->a[iPTEDst].n.u1Present)
1265 {
1266# ifdef PGMPOOL_WITH_USER_TRACKING
1267 /* This is very unlikely with caching/monitoring enabled. */
1268 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1269# endif
1270 pPT->a[iPTEDst].u = 0;
1271 }
1272# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1273 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
1274 if (RT_SUCCESS(rc))
1275 rc = VINF_SUCCESS;
1276# endif
1277 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1278 PGM_INVL_PG(GCPtrPage);
1279 }
1280 else
1281 {
1282 /*
1283 * The page table address changed.
1284 */
1285 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1286 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1287# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1288 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1289# else
1290 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1291# endif
1292 pPdeDst->u = 0;
1293 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1294 PGM_INVL_GUEST_TLBS();
1295 }
1296 }
1297 else
1298 {
1299 /*
1300 * 2/4MB - page.
1301 */
1302 /* Before freeing the page, check if anything really changed. */
1303 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1304 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1305# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1306 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1307 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1308# endif
1309 if ( pShwPage->GCPhys == GCPhys
1310 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1311 {
1312 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1313 /** @todo PAT */
1314 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1315 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1316 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1317 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1318 {
1319 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1320 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1321 return VINF_SUCCESS;
1322 }
1323 }
1324
1325 /*
1326 * Ok, the page table is present and it's been changed in the guest.
1327 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1328 * We could do this for some flushes in GC too, but we need an algorithm for
1329 * deciding which 4MB pages containing code likely to be executed very soon.
1330 */
1331 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1332 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1333# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1334 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1335# else
1336 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1337# endif
1338 pPdeDst->u = 0;
1339 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1340 PGM_INVL_BIG_PG(GCPtrPage);
1341 }
1342 }
1343 else
1344 {
1345 /*
1346 * Page directory is not present, mark shadow PDE not present.
1347 */
1348 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1349 {
1350# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1351 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1352# else
1353 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1354# endif
1355 pPdeDst->u = 0;
1356 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1357 PGM_INVL_PG(GCPtrPage);
1358 }
1359 else
1360 {
1361 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1362 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1363 }
1364 }
1365
1366 return rc;
1367
1368#else /* guest real and protected mode */
1369 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1370 return VINF_SUCCESS;
1371#endif
1372}
1373
1374
1375#ifdef PGMPOOL_WITH_USER_TRACKING
1376/**
1377 * Update the tracking of shadowed pages.
1378 *
1379 * @param pVM The VM handle.
1380 * @param pShwPage The shadow page.
1381 * @param HCPhys The physical page we is being dereferenced.
1382 */
1383DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1384{
1385# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1386 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1387 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1388
1389 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1390 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1391 * 2. write protect all shadowed pages. I.e. implement caching.
1392 */
1393 /*
1394 * Find the guest address.
1395 */
1396 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1397 pRam;
1398 pRam = pRam->CTX_SUFF(pNext))
1399 {
1400 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1401 while (iPage-- > 0)
1402 {
1403 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1404 {
1405 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1406 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1407 pShwPage->cPresent--;
1408 pPool->cPresent--;
1409 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1410 return;
1411 }
1412 }
1413 }
1414
1415 for (;;)
1416 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1417# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1418 pShwPage->cPresent--;
1419 pVM->pgm.s.CTX_SUFF(pPool)->cPresent--;
1420# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1421}
1422
1423
1424/**
1425 * Update the tracking of shadowed pages.
1426 *
1427 * @param pVM The VM handle.
1428 * @param pShwPage The shadow page.
1429 * @param u16 The top 16-bit of the pPage->HCPhys.
1430 * @param pPage Pointer to the guest page. this will be modified.
1431 * @param iPTDst The index into the shadow table.
1432 */
1433DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1434{
1435# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1436 /*
1437 * Just deal with the simple first time here.
1438 */
1439 if (!u16)
1440 {
1441 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1442 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1443 }
1444 else
1445 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1446
1447 /* write back */
1448 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1449 PGM_PAGE_SET_TRACKING(pPage, u16);
1450
1451# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1452
1453 /* update statistics. */
1454 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1455 pShwPage->cPresent++;
1456 if (pShwPage->iFirstPresent > iPTDst)
1457 pShwPage->iFirstPresent = iPTDst;
1458}
1459#endif /* PGMPOOL_WITH_USER_TRACKING */
1460
1461
1462/**
1463 * Creates a 4K shadow page for a guest page.
1464 *
1465 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1466 * physical address. The PdeSrc argument only the flags are used. No page structured
1467 * will be mapped in this function.
1468 *
1469 * @param pVM VM handle.
1470 * @param pPteDst Destination page table entry.
1471 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1472 * Can safely assume that only the flags are being used.
1473 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1474 * @param pShwPage Pointer to the shadow page.
1475 * @param iPTDst The index into the shadow table.
1476 *
1477 * @remark Not used for 2/4MB pages!
1478 */
1479DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1480{
1481 if (PteSrc.n.u1Present)
1482 {
1483 /*
1484 * Find the ram range.
1485 */
1486 PPGMPAGE pPage;
1487 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1488 if (RT_SUCCESS(rc))
1489 {
1490#ifdef VBOX_WITH_NEW_PHYS_CODE
1491# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1492 /* Try make the page writable if necessary. */
1493 if ( PteSrc.n.u1Write
1494 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1495 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1496 {
1497 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1498 AssertRC(rc);
1499 }
1500# endif
1501#endif
1502
1503 /** @todo investiage PWT, PCD and PAT. */
1504 /*
1505 * Make page table entry.
1506 */
1507 SHWPTE PteDst;
1508 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1509 {
1510 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1511 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1512 {
1513#if PGM_SHW_TYPE == PGM_TYPE_EPT
1514 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1515 PteDst.n.u1Present = 1;
1516 PteDst.n.u1Execute = 1;
1517 PteDst.n.u1IgnorePAT = 1;
1518 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1519 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1520#else
1521 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1522 | PGM_PAGE_GET_HCPHYS(pPage);
1523#endif
1524 }
1525 else
1526 {
1527 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1528 PteDst.u = 0;
1529 }
1530 /** @todo count these two kinds. */
1531 }
1532 else
1533 {
1534#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1535 /*
1536 * If the page or page directory entry is not marked accessed,
1537 * we mark the page not present.
1538 */
1539 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1540 {
1541 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1542 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1543 PteDst.u = 0;
1544 }
1545 else
1546 /*
1547 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1548 * when the page is modified.
1549 */
1550 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1551 {
1552 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1553 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1554 | PGM_PAGE_GET_HCPHYS(pPage)
1555 | PGM_PTFLAGS_TRACK_DIRTY;
1556 }
1557 else
1558#endif
1559 {
1560 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1561#if PGM_SHW_TYPE == PGM_TYPE_EPT
1562 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1563 PteDst.n.u1Present = 1;
1564 PteDst.n.u1Write = 1;
1565 PteDst.n.u1Execute = 1;
1566 PteDst.n.u1IgnorePAT = 1;
1567 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1568 /* PteDst.n.u1Size = 0 */
1569#else
1570 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1571 | PGM_PAGE_GET_HCPHYS(pPage);
1572#endif
1573 }
1574 }
1575
1576#ifdef VBOX_WITH_NEW_PHYS_CODE
1577 /*
1578 * Make sure only allocated pages are mapped writable.
1579 */
1580 if ( PteDst.n.u1Write
1581 && PteDst.n.u1Present
1582 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1583 {
1584 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1585 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1586 }
1587#endif
1588
1589#ifdef PGMPOOL_WITH_USER_TRACKING
1590 /*
1591 * Keep user track up to date.
1592 */
1593 if (PteDst.n.u1Present)
1594 {
1595 if (!pPteDst->n.u1Present)
1596 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1597 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1598 {
1599 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1600 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1601 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1602 }
1603 }
1604 else if (pPteDst->n.u1Present)
1605 {
1606 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1607 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1608 }
1609#endif /* PGMPOOL_WITH_USER_TRACKING */
1610
1611 /*
1612 * Update statistics and commit the entry.
1613 */
1614#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1615 if (!PteSrc.n.u1Global)
1616 pShwPage->fSeenNonGlobal = true;
1617#endif
1618 *pPteDst = PteDst;
1619 }
1620 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1621 /** @todo count these. */
1622 }
1623 else
1624 {
1625 /*
1626 * Page not-present.
1627 */
1628 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1629#ifdef PGMPOOL_WITH_USER_TRACKING
1630 /* Keep user track up to date. */
1631 if (pPteDst->n.u1Present)
1632 {
1633 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1634 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1635 }
1636#endif /* PGMPOOL_WITH_USER_TRACKING */
1637 pPteDst->u = 0;
1638 /** @todo count these. */
1639 }
1640}
1641
1642
1643/**
1644 * Syncs a guest OS page.
1645 *
1646 * There are no conflicts at this point, neither is there any need for
1647 * page table allocations.
1648 *
1649 * @returns VBox status code.
1650 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1651 * @param pVM VM handle.
1652 * @param PdeSrc Page directory entry of the guest.
1653 * @param GCPtrPage Guest context page address.
1654 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1655 * @param uErr Fault error (X86_TRAP_PF_*).
1656 */
1657PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1658{
1659 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1660
1661#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1662 || PGM_GST_TYPE == PGM_TYPE_PAE \
1663 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1664 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1665 && PGM_SHW_TYPE != PGM_TYPE_EPT
1666
1667# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1668 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1669# endif
1670
1671 /*
1672 * Assert preconditions.
1673 */
1674 Assert(PdeSrc.n.u1Present);
1675 Assert(cPages);
1676 STAM_COUNTER_INC(&pVM->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1677
1678 /*
1679 * Get the shadow PDE, find the shadow page table in the pool.
1680 */
1681# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1682 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1683 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
1684
1685# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1686 /* Fetch the pgm pool shadow descriptor. */
1687 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
1688 Assert(pShwPde);
1689# endif
1690
1691# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1692
1693# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1694 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1695 PPGMPOOLPAGE pShwPde;
1696 PX86PDPAE pPDDst;
1697
1698 /* Fetch the pgm pool shadow descriptor. */
1699 int rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
1700 AssertRCSuccessReturn(rc, rc);
1701 Assert(pShwPde);
1702
1703 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1704 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1705# else
1706 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) /*& SHW_PD_MASK - only pool index atm! */;
1707 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1708 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s); NOREF(pPdptDst);
1709 PX86PDEPAE pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
1710 AssertReturn(pPdeDst, VERR_INTERNAL_ERROR);
1711# endif
1712# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1713 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1714 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1715 PX86PDPAE pPDDst;
1716 PX86PDPT pPdptDst;
1717
1718 int rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1719 AssertRCSuccessReturn(rc, rc);
1720 Assert(pPDDst && pPdptDst);
1721 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1722# endif
1723
1724 SHWPDE PdeDst = *pPdeDst;
1725 AssertMsg(PdeDst.n.u1Present, ("%p=%llx\n", pPdeDst, (uint64_t)PdeDst.u));
1726 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1727
1728# if PGM_GST_TYPE == PGM_TYPE_AMD64
1729 /* Fetch the pgm pool shadow descriptor. */
1730 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1731 Assert(pShwPde);
1732# endif
1733
1734 /*
1735 * Check that the page is present and that the shadow PDE isn't out of sync.
1736 */
1737# if PGM_GST_TYPE == PGM_TYPE_AMD64
1738 const bool fBigPage = PdeSrc.b.u1Size;
1739# else
1740 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1741# endif
1742 RTGCPHYS GCPhys;
1743 if (!fBigPage)
1744 {
1745 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1746# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1747 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1748 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1749# endif
1750 }
1751 else
1752 {
1753 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1754# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1755 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1756 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1757# endif
1758 }
1759 if ( pShwPage->GCPhys == GCPhys
1760 && PdeSrc.n.u1Present
1761 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1762 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1763# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1764 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1765# endif
1766 )
1767 {
1768 /*
1769 * Check that the PDE is marked accessed already.
1770 * Since we set the accessed bit *before* getting here on a #PF, this
1771 * check is only meant for dealing with non-#PF'ing paths.
1772 */
1773 if (PdeSrc.n.u1Accessed)
1774 {
1775 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1776 if (!fBigPage)
1777 {
1778 /*
1779 * 4KB Page - Map the guest page table.
1780 */
1781 PGSTPT pPTSrc;
1782 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1783 if (RT_SUCCESS(rc))
1784 {
1785# ifdef PGM_SYNC_N_PAGES
1786 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1787 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1788 {
1789 /*
1790 * This code path is currently only taken when the caller is PGMTrap0eHandler
1791 * for non-present pages!
1792 *
1793 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1794 * deal with locality.
1795 */
1796 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1797# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1798 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1799 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1800# else
1801 const unsigned offPTSrc = 0;
1802# endif
1803 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1804 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1805 iPTDst = 0;
1806 else
1807 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1808 for (; iPTDst < iPTDstEnd; iPTDst++)
1809 {
1810 if (!pPTDst->a[iPTDst].n.u1Present)
1811 {
1812 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1813 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1814 NOREF(GCPtrCurPage);
1815#ifndef IN_RING0
1816 /*
1817 * Assuming kernel code will be marked as supervisor - and not as user level
1818 * and executed using a conforming code selector - And marked as readonly.
1819 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1820 */
1821 PPGMPAGE pPage;
1822 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1823 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1824 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)GCPtrCurPage)
1825 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1826 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1827 )
1828#endif /* else: CSAM not active */
1829 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1830 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1831 GCPtrCurPage, PteSrc.n.u1Present,
1832 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1833 PteSrc.n.u1User & PdeSrc.n.u1User,
1834 (uint64_t)PteSrc.u,
1835 (uint64_t)pPTDst->a[iPTDst].u,
1836 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1837 }
1838 }
1839 }
1840 else
1841# endif /* PGM_SYNC_N_PAGES */
1842 {
1843 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1844 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1845 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1846 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1847 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1848 GCPtrPage, PteSrc.n.u1Present,
1849 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1850 PteSrc.n.u1User & PdeSrc.n.u1User,
1851 (uint64_t)PteSrc.u,
1852 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1853 }
1854 }
1855 else /* MMIO or invalid page: emulated in #PF handler. */
1856 {
1857 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1858 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1859 }
1860 }
1861 else
1862 {
1863 /*
1864 * 4/2MB page - lazy syncing shadow 4K pages.
1865 * (There are many causes of getting here, it's no longer only CSAM.)
1866 */
1867 /* Calculate the GC physical address of this 4KB shadow page. */
1868 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1869 /* Find ram range. */
1870 PPGMPAGE pPage;
1871 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1872 if (RT_SUCCESS(rc))
1873 {
1874# ifdef VBOX_WITH_NEW_PHYS_CODE
1875# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1876 /* Try make the page writable if necessary. */
1877 if ( PdeSrc.n.u1Write
1878 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1879 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1880 {
1881 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
1882 AssertRC(rc);
1883 }
1884# endif
1885# endif
1886
1887 /*
1888 * Make shadow PTE entry.
1889 */
1890 SHWPTE PteDst;
1891 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1892 | PGM_PAGE_GET_HCPHYS(pPage);
1893 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1894 {
1895 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1896 PteDst.n.u1Write = 0;
1897 else
1898 PteDst.u = 0;
1899 }
1900 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1901# ifdef PGMPOOL_WITH_USER_TRACKING
1902 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1903 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1904# endif
1905# ifdef VBOX_WITH_NEW_PHYS_CODE
1906 /* Make sure only allocated pages are mapped writable. */
1907 if ( PteDst.n.u1Write
1908 && PteDst.n.u1Present
1909 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1910 {
1911 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1912 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1913 }
1914# endif
1915
1916 pPTDst->a[iPTDst] = PteDst;
1917
1918
1919 /*
1920 * If the page is not flagged as dirty and is writable, then make it read-only
1921 * at PD level, so we can set the dirty bit when the page is modified.
1922 *
1923 * ASSUMES that page access handlers are implemented on page table entry level.
1924 * Thus we will first catch the dirty access and set PDE.D and restart. If
1925 * there is an access handler, we'll trap again and let it work on the problem.
1926 */
1927 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1928 * As for invlpg, it simply frees the whole shadow PT.
1929 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1930 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1931 {
1932 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1933 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1934 PdeDst.n.u1Write = 0;
1935 }
1936 else
1937 {
1938 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1939 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1940 }
1941 *pPdeDst = PdeDst;
1942 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1943 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1944 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1945 }
1946 else
1947 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1948 }
1949 return VINF_SUCCESS;
1950 }
1951 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1952 }
1953 else
1954 {
1955 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1956 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1957 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1958 }
1959
1960 /*
1961 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1962 * Yea, I'm lazy.
1963 */
1964 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1965# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1966 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1967# else
1968 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPDDst);
1969# endif
1970
1971 pPdeDst->u = 0;
1972 PGM_INVL_GUEST_TLBS();
1973 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1974
1975#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1976 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1977 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
1978
1979# ifdef PGM_SYNC_N_PAGES
1980 /*
1981 * Get the shadow PDE, find the shadow page table in the pool.
1982 */
1983# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1984 X86PDE PdeDst = pgmShwGet32BitPDE(&pVM->pgm.s, GCPtrPage);
1985
1986# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1987 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVM->pgm.s, GCPtrPage);
1988
1989# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1990 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1991 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1992 PX86PDPAE pPDDst;
1993 X86PDEPAE PdeDst;
1994 PX86PDPT pPdptDst;
1995
1996 int rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1997 AssertRCSuccessReturn(rc, rc);
1998 Assert(pPDDst && pPdptDst);
1999 PdeDst = pPDDst->a[iPDDst];
2000# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2001 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2002 PEPTPD pPDDst;
2003 EPTPDE PdeDst;
2004
2005 int rc = pgmShwGetEPTPDPtr(pVM, GCPtrPage, NULL, &pPDDst);
2006 if (rc != VINF_SUCCESS)
2007 {
2008 AssertRC(rc);
2009 return rc;
2010 }
2011 Assert(pPDDst);
2012 PdeDst = pPDDst->a[iPDDst];
2013# endif
2014 AssertMsg(PdeDst.n.u1Present, ("%#llx\n", (uint64_t)PdeDst.u));
2015 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
2016 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2017
2018 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2019 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
2020 {
2021 /*
2022 * This code path is currently only taken when the caller is PGMTrap0eHandler
2023 * for non-present pages!
2024 *
2025 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2026 * deal with locality.
2027 */
2028 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2029 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2030 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2031 iPTDst = 0;
2032 else
2033 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2034 for (; iPTDst < iPTDstEnd; iPTDst++)
2035 {
2036 if (!pPTDst->a[iPTDst].n.u1Present)
2037 {
2038 GSTPTE PteSrc;
2039
2040 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2041
2042 /* Fake the page table entry */
2043 PteSrc.u = GCPtrCurPage;
2044 PteSrc.n.u1Present = 1;
2045 PteSrc.n.u1Dirty = 1;
2046 PteSrc.n.u1Accessed = 1;
2047 PteSrc.n.u1Write = 1;
2048 PteSrc.n.u1User = 1;
2049
2050 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2051
2052 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2053 GCPtrCurPage, PteSrc.n.u1Present,
2054 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2055 PteSrc.n.u1User & PdeSrc.n.u1User,
2056 (uint64_t)PteSrc.u,
2057 (uint64_t)pPTDst->a[iPTDst].u,
2058 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2059 }
2060 else
2061 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2062 }
2063 }
2064 else
2065# endif /* PGM_SYNC_N_PAGES */
2066 {
2067 GSTPTE PteSrc;
2068 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2069 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2070
2071 /* Fake the page table entry */
2072 PteSrc.u = GCPtrCurPage;
2073 PteSrc.n.u1Present = 1;
2074 PteSrc.n.u1Dirty = 1;
2075 PteSrc.n.u1Accessed = 1;
2076 PteSrc.n.u1Write = 1;
2077 PteSrc.n.u1User = 1;
2078 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2079
2080 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2081 GCPtrPage, PteSrc.n.u1Present,
2082 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2083 PteSrc.n.u1User & PdeSrc.n.u1User,
2084 (uint64_t)PteSrc.u,
2085 (uint64_t)pPTDst->a[iPTDst].u,
2086 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2087 }
2088 return VINF_SUCCESS;
2089
2090#else
2091 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2092 return VERR_INTERNAL_ERROR;
2093#endif
2094}
2095
2096
2097#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2098/**
2099 * Investigate page fault and handle write protection page faults caused by
2100 * dirty bit tracking.
2101 *
2102 * @returns VBox status code.
2103 * @param pVM VM handle.
2104 * @param uErr Page fault error code.
2105 * @param pPdeDst Shadow page directory entry.
2106 * @param pPdeSrc Guest page directory entry.
2107 * @param GCPtrPage Guest context page address.
2108 */
2109PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2110{
2111 bool fWriteProtect = !!(CPUMGetGuestCR0(pVM) & X86_CR0_WP);
2112 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2113 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2114# if PGM_GST_TYPE == PGM_TYPE_AMD64
2115 bool fBigPagesSupported = true;
2116# else
2117 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2118# endif
2119# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2120 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
2121# endif
2122 unsigned uPageFaultLevel;
2123 int rc;
2124
2125 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2126 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2127
2128# if PGM_GST_TYPE == PGM_TYPE_PAE \
2129 || PGM_GST_TYPE == PGM_TYPE_AMD64
2130
2131# if PGM_GST_TYPE == PGM_TYPE_AMD64
2132 PX86PML4E pPml4eSrc;
2133 PX86PDPE pPdpeSrc;
2134
2135 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc);
2136 Assert(pPml4eSrc);
2137
2138 /*
2139 * Real page fault? (PML4E level)
2140 */
2141 if ( (uErr & X86_TRAP_PF_RSVD)
2142 || !pPml4eSrc->n.u1Present
2143 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
2144 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2145 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2146 )
2147 {
2148 uPageFaultLevel = 0;
2149 goto l_UpperLevelPageFault;
2150 }
2151 Assert(pPdpeSrc);
2152
2153# else /* PAE */
2154 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVM->pgm.s, GCPtrPage);
2155# endif /* PAE */
2156
2157 /*
2158 * Real page fault? (PDPE level)
2159 */
2160 if ( (uErr & X86_TRAP_PF_RSVD)
2161 || !pPdpeSrc->n.u1Present
2162# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2163 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
2164 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
2165 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2166# endif
2167 )
2168 {
2169 uPageFaultLevel = 1;
2170 goto l_UpperLevelPageFault;
2171 }
2172# endif
2173
2174 /*
2175 * Real page fault? (PDE level)
2176 */
2177 if ( (uErr & X86_TRAP_PF_RSVD)
2178 || !pPdeSrc->n.u1Present
2179# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2180 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
2181# endif
2182 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2183 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2184 {
2185 uPageFaultLevel = 2;
2186 goto l_UpperLevelPageFault;
2187 }
2188
2189 /*
2190 * First check the easy case where the page directory has been marked read-only to track
2191 * the dirty bit of an emulated BIG page
2192 */
2193 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2194 {
2195 /* Mark guest page directory as accessed */
2196# if PGM_GST_TYPE == PGM_TYPE_AMD64
2197 pPml4eSrc->n.u1Accessed = 1;
2198 pPdpeSrc->lm.u1Accessed = 1;
2199# endif
2200 pPdeSrc->b.u1Accessed = 1;
2201
2202 /*
2203 * Only write protection page faults are relevant here.
2204 */
2205 if (fWriteFault)
2206 {
2207 /* Mark guest page directory as dirty (BIG page only). */
2208 pPdeSrc->b.u1Dirty = 1;
2209
2210 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2211 {
2212 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2213
2214 Assert(pPdeSrc->b.u1Write);
2215
2216 pPdeDst->n.u1Write = 1;
2217 pPdeDst->n.u1Accessed = 1;
2218 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2219 PGM_INVL_BIG_PG(GCPtrPage);
2220 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2221 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2222 }
2223 }
2224 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2225 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2226 }
2227 /* else: 4KB page table */
2228
2229 /*
2230 * Map the guest page table.
2231 */
2232 PGSTPT pPTSrc;
2233 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2234 if (RT_SUCCESS(rc))
2235 {
2236 /*
2237 * Real page fault?
2238 */
2239 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2240 const GSTPTE PteSrc = *pPteSrc;
2241 if ( !PteSrc.n.u1Present
2242# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2243 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
2244# endif
2245 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
2246 || (fUserLevelFault && !PteSrc.n.u1User)
2247 )
2248 {
2249 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2250 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2251 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2252
2253 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2254 * See the 2nd case above as well.
2255 */
2256 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2257 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2258
2259 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2260 return VINF_EM_RAW_GUEST_TRAP;
2261 }
2262 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2263
2264 /*
2265 * Set the accessed bits in the page directory and the page table.
2266 */
2267# if PGM_GST_TYPE == PGM_TYPE_AMD64
2268 pPml4eSrc->n.u1Accessed = 1;
2269 pPdpeSrc->lm.u1Accessed = 1;
2270# endif
2271 pPdeSrc->n.u1Accessed = 1;
2272 pPteSrc->n.u1Accessed = 1;
2273
2274 /*
2275 * Only write protection page faults are relevant here.
2276 */
2277 if (fWriteFault)
2278 {
2279 /* Write access, so mark guest entry as dirty. */
2280# ifdef VBOX_WITH_STATISTICS
2281 if (!pPteSrc->n.u1Dirty)
2282 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2283 else
2284 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2285# endif
2286
2287 pPteSrc->n.u1Dirty = 1;
2288
2289 if (pPdeDst->n.u1Present)
2290 {
2291#ifndef IN_RING0
2292 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2293 * Our individual shadow handlers will provide more information and force a fatal exit.
2294 */
2295 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2296 {
2297 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2298 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2299 return VINF_SUCCESS;
2300 }
2301#endif
2302 /*
2303 * Map shadow page table.
2304 */
2305 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2306 if (pShwPage)
2307 {
2308 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2309 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2310 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
2311 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
2312 {
2313 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2314# ifdef VBOX_STRICT
2315 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2316 if (pPage)
2317 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
2318 ("Unexpected dirty bit tracking on monitored page %RGv (phys %RGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
2319# endif
2320 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2321
2322 Assert(pPteSrc->n.u1Write);
2323
2324 pPteDst->n.u1Write = 1;
2325 pPteDst->n.u1Dirty = 1;
2326 pPteDst->n.u1Accessed = 1;
2327 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2328 PGM_INVL_PG(GCPtrPage);
2329
2330 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2331 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2332 }
2333 }
2334 else
2335 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2336 }
2337 }
2338/** @todo Optimize accessed bit emulation? */
2339# ifdef VBOX_STRICT
2340 /*
2341 * Sanity check.
2342 */
2343 else if ( !pPteSrc->n.u1Dirty
2344 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2345 && pPdeDst->n.u1Present)
2346 {
2347 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2348 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2349 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2350 if ( pPteDst->n.u1Present
2351 && pPteDst->n.u1Write)
2352 LogFlow(("Writable present page %RGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2353 }
2354# endif /* VBOX_STRICT */
2355 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2356 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2357 }
2358 AssertRC(rc);
2359 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2360 return rc;
2361
2362
2363l_UpperLevelPageFault:
2364 /*
2365 * Pagefault detected while checking the PML4E, PDPE or PDE.
2366 * Single exit handler to get rid of duplicate code paths.
2367 */
2368 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2369 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2370 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2371
2372 if (
2373# if PGM_GST_TYPE == PGM_TYPE_AMD64
2374 pPml4eSrc->n.u1Present &&
2375# endif
2376# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2377 pPdpeSrc->n.u1Present &&
2378# endif
2379 pPdeSrc->n.u1Present)
2380 {
2381 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2382 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2383 {
2384 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2385 }
2386 else
2387 {
2388 /*
2389 * Map the guest page table.
2390 */
2391 PGSTPT pPTSrc;
2392 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2393 if (RT_SUCCESS(rc))
2394 {
2395 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2396 const GSTPTE PteSrc = *pPteSrc;
2397 if (pPteSrc->n.u1Present)
2398 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2399 }
2400 AssertRC(rc);
2401 }
2402 }
2403 return VINF_EM_RAW_GUEST_TRAP;
2404}
2405#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2406
2407
2408/**
2409 * Sync a shadow page table.
2410 *
2411 * The shadow page table is not present. This includes the case where
2412 * there is a conflict with a mapping.
2413 *
2414 * @returns VBox status code.
2415 * @param pVM VM handle.
2416 * @param iPD Page directory index.
2417 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2418 * Assume this is a temporary mapping.
2419 * @param GCPtrPage GC Pointer of the page that caused the fault
2420 */
2421PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2422{
2423 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2424 STAM_COUNTER_INC(&pVM->pgm.s.StatSyncPtPD[iPDSrc]);
2425 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2426
2427#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2428 || PGM_GST_TYPE == PGM_TYPE_PAE \
2429 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2430 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2431 && PGM_SHW_TYPE != PGM_TYPE_EPT
2432
2433 int rc = VINF_SUCCESS;
2434
2435 /*
2436 * Validate input a little bit.
2437 */
2438 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2439# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2440 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2441 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
2442
2443# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2444 /* Fetch the pgm pool shadow descriptor. */
2445 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
2446 Assert(pShwPde);
2447# endif
2448
2449# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2450# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2451 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2452 PPGMPOOLPAGE pShwPde;
2453 PX86PDPAE pPDDst;
2454 PSHWPDE pPdeDst;
2455
2456 /* Fetch the pgm pool shadow descriptor. */
2457 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
2458 AssertRCSuccessReturn(rc, rc);
2459 Assert(pShwPde);
2460
2461 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2462 pPdeDst = &pPDDst->a[iPDDst];
2463# else
2464 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) /*& SHW_PD_MASK - only pool index atm! */;
2465 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT); NOREF(iPdpt);
2466 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s); NOREF(pPdptDst);
2467 PSHWPDE pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
2468# endif
2469# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2470 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2471 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2472 PX86PDPAE pPDDst;
2473 PX86PDPT pPdptDst;
2474 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2475 AssertRCSuccessReturn(rc, rc);
2476 Assert(pPDDst);
2477 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2478# endif
2479 SHWPDE PdeDst = *pPdeDst;
2480
2481# if PGM_GST_TYPE == PGM_TYPE_AMD64
2482 /* Fetch the pgm pool shadow descriptor. */
2483 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2484 Assert(pShwPde);
2485# endif
2486
2487# ifndef PGM_WITHOUT_MAPPINGS
2488 /*
2489 * Check for conflicts.
2490 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2491 * HC: Simply resolve the conflict.
2492 */
2493 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2494 {
2495 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2496# ifndef IN_RING3
2497 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2498 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2499 return VERR_ADDRESS_CONFLICT;
2500# else
2501 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2502 Assert(pMapping);
2503# if PGM_GST_TYPE == PGM_TYPE_32BIT
2504 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2505# elif PGM_GST_TYPE == PGM_TYPE_PAE
2506 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2507# else
2508 AssertFailed(); /* can't happen for amd64 */
2509# endif
2510 if (RT_FAILURE(rc))
2511 {
2512 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2513 return rc;
2514 }
2515 PdeDst = *pPdeDst;
2516# endif
2517 }
2518# else /* PGM_WITHOUT_MAPPINGS */
2519 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2520# endif /* PGM_WITHOUT_MAPPINGS */
2521 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2522
2523# if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2524 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2525 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2526# endif
2527
2528 /*
2529 * Sync page directory entry.
2530 */
2531 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2532 if (PdeSrc.n.u1Present)
2533 {
2534 /*
2535 * Allocate & map the page table.
2536 */
2537 PSHWPT pPTDst;
2538# if PGM_GST_TYPE == PGM_TYPE_AMD64
2539 const bool fPageTable = !PdeSrc.b.u1Size;
2540# else
2541 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2542# endif
2543 PPGMPOOLPAGE pShwPage;
2544 RTGCPHYS GCPhys;
2545 if (fPageTable)
2546 {
2547 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2548# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2549 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2550 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2551# endif
2552# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2553 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2554# else
2555 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2556# endif
2557 }
2558 else
2559 {
2560 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2561# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2562 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2563 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2564# endif
2565# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2566 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, pShwPde->idx, iPDDst, &pShwPage);
2567# else
2568 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2569# endif
2570 }
2571 if (rc == VINF_SUCCESS)
2572 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2573 else if (rc == VINF_PGM_CACHED_PAGE)
2574 {
2575 /*
2576 * The PT was cached, just hook it up.
2577 */
2578 if (fPageTable)
2579 PdeDst.u = pShwPage->Core.Key
2580 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2581 else
2582 {
2583 PdeDst.u = pShwPage->Core.Key
2584 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2585 /* (see explanation and assumptions further down.) */
2586 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2587 {
2588 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2589 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2590 PdeDst.b.u1Write = 0;
2591 }
2592 }
2593 *pPdeDst = PdeDst;
2594# if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2595 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2596# endif
2597 return VINF_SUCCESS;
2598 }
2599 else if (rc == VERR_PGM_POOL_FLUSHED)
2600 {
2601 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
2602# if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2603 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2604# endif
2605 return VINF_PGM_SYNC_CR3;
2606 }
2607 else
2608 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2609 PdeDst.u &= X86_PDE_AVL_MASK;
2610 PdeDst.u |= pShwPage->Core.Key;
2611
2612 /*
2613 * Page directory has been accessed (this is a fault situation, remember).
2614 */
2615 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2616 if (fPageTable)
2617 {
2618 /*
2619 * Page table - 4KB.
2620 *
2621 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2622 */
2623 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2624 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2625 PGSTPT pPTSrc;
2626 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2627 if (RT_SUCCESS(rc))
2628 {
2629 /*
2630 * Start by syncing the page directory entry so CSAM's TLB trick works.
2631 */
2632 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2633 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2634 *pPdeDst = PdeDst;
2635# if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2636 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2637# endif
2638
2639 /*
2640 * Directory/page user or supervisor privilege: (same goes for read/write)
2641 *
2642 * Directory Page Combined
2643 * U/S U/S U/S
2644 * 0 0 0
2645 * 0 1 0
2646 * 1 0 0
2647 * 1 1 1
2648 *
2649 * Simple AND operation. Table listed for completeness.
2650 *
2651 */
2652 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2653# ifdef PGM_SYNC_N_PAGES
2654 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2655 unsigned iPTDst = iPTBase;
2656 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2657 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2658 iPTDst = 0;
2659 else
2660 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2661# else /* !PGM_SYNC_N_PAGES */
2662 unsigned iPTDst = 0;
2663 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2664# endif /* !PGM_SYNC_N_PAGES */
2665# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2666 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2667 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2668# else
2669 const unsigned offPTSrc = 0;
2670# endif
2671 for (; iPTDst < iPTDstEnd; iPTDst++)
2672 {
2673 const unsigned iPTSrc = iPTDst + offPTSrc;
2674 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2675
2676 if (PteSrc.n.u1Present) /* we've already cleared it above */
2677 {
2678# ifndef IN_RING0
2679 /*
2680 * Assuming kernel code will be marked as supervisor - and not as user level
2681 * and executed using a conforming code selector - And marked as readonly.
2682 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2683 */
2684 PPGMPAGE pPage;
2685 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2686 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2687 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2688 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2689 )
2690# endif
2691 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2692 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2693 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2694 PteSrc.n.u1Present,
2695 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2696 PteSrc.n.u1User & PdeSrc.n.u1User,
2697 (uint64_t)PteSrc.u,
2698 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2699 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2700 }
2701 } /* for PTEs */
2702 }
2703 }
2704 else
2705 {
2706 /*
2707 * Big page - 2/4MB.
2708 *
2709 * We'll walk the ram range list in parallel and optimize lookups.
2710 * We will only sync on shadow page table at a time.
2711 */
2712 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2713
2714 /**
2715 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2716 */
2717
2718 /*
2719 * Start by syncing the page directory entry.
2720 */
2721 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2722 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2723
2724 /*
2725 * If the page is not flagged as dirty and is writable, then make it read-only
2726 * at PD level, so we can set the dirty bit when the page is modified.
2727 *
2728 * ASSUMES that page access handlers are implemented on page table entry level.
2729 * Thus we will first catch the dirty access and set PDE.D and restart. If
2730 * there is an access handler, we'll trap again and let it work on the problem.
2731 */
2732 /** @todo move the above stuff to a section in the PGM documentation. */
2733 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2734 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2735 {
2736 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2737 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2738 PdeDst.b.u1Write = 0;
2739 }
2740 *pPdeDst = PdeDst;
2741# if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2742 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2743# endif
2744
2745 /*
2746 * Fill the shadow page table.
2747 */
2748 /* Get address and flags from the source PDE. */
2749 SHWPTE PteDstBase;
2750 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2751
2752 /* Loop thru the entries in the shadow PT. */
2753 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2754 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2755 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2756 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2757 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2758 unsigned iPTDst = 0;
2759 while (iPTDst < RT_ELEMENTS(pPTDst->a))
2760 {
2761 /* Advance ram range list. */
2762 while (pRam && GCPhys > pRam->GCPhysLast)
2763 pRam = pRam->CTX_SUFF(pNext);
2764 if (pRam && GCPhys >= pRam->GCPhys)
2765 {
2766 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2767 do
2768 {
2769 /* Make shadow PTE. */
2770 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2771 SHWPTE PteDst;
2772
2773# ifdef VBOX_WITH_NEW_PHYS_CODE
2774# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2775 /* Try make the page writable if necessary. */
2776 if ( PteDstBase.n.u1Write
2777 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2778 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2779 {
2780 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
2781 AssertRCReturn(rc, rc);
2782 }
2783# endif
2784# else /* !VBOX_WITH_NEW_PHYS_CODE */
2785 /* Make sure the RAM has already been allocated. */
2786 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC) /** @todo PAGE FLAGS */
2787 {
2788 if (RT_UNLIKELY(!PGM_PAGE_GET_HCPHYS(pPage)))
2789 {
2790# ifdef IN_RING3
2791 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2792# else
2793 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2794# endif
2795 if (rc != VINF_SUCCESS)
2796 return rc;
2797 }
2798 }
2799# endif /* !VBOX_WITH_NEW_PHYS_CODE */
2800
2801 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2802 {
2803 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2804 {
2805 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2806 PteDst.n.u1Write = 0;
2807 }
2808 else
2809 PteDst.u = 0;
2810 }
2811# ifndef IN_RING0
2812 /*
2813 * Assuming kernel code will be marked as supervisor and not as user level and executed
2814 * using a conforming code selector. Don't check for readonly, as that implies the whole
2815 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2816 */
2817 else if ( !PdeSrc.n.u1User
2818 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2819 PteDst.u = 0;
2820# endif
2821 else
2822 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2823
2824# ifdef VBOX_WITH_NEW_PHYS_CODE
2825 /* Only map writable pages writable. */
2826 if ( PteDst.n.u1Write
2827 && PteDst.n.u1Present
2828 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2829 {
2830 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2831 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2832 }
2833# endif
2834
2835# ifdef PGMPOOL_WITH_USER_TRACKING
2836 if (PteDst.n.u1Present)
2837 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2838# endif
2839 /* commit it */
2840 pPTDst->a[iPTDst] = PteDst;
2841 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2842 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2843 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2844
2845 /* advance */
2846 GCPhys += PAGE_SIZE;
2847 iHCPage++;
2848 iPTDst++;
2849 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2850 && GCPhys <= pRam->GCPhysLast);
2851 }
2852 else if (pRam)
2853 {
2854 Log(("Invalid pages at %RGp\n", GCPhys));
2855 do
2856 {
2857 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2858 GCPhys += PAGE_SIZE;
2859 iPTDst++;
2860 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2861 && GCPhys < pRam->GCPhys);
2862 }
2863 else
2864 {
2865 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2866 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2867 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2868 }
2869 } /* while more PTEs */
2870 } /* 4KB / 4MB */
2871 }
2872 else
2873 AssertRelease(!PdeDst.n.u1Present);
2874
2875 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2876 if (RT_FAILURE(rc))
2877 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2878 return rc;
2879
2880#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2881 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2882 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
2883
2884
2885 /*
2886 * Validate input a little bit.
2887 */
2888 int rc = VINF_SUCCESS;
2889# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2890 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2891 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
2892
2893# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2894 /* Fetch the pgm pool shadow descriptor. */
2895 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
2896 Assert(pShwPde);
2897# endif
2898
2899# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2900# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2901 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2902 PPGMPOOLPAGE pShwPde;
2903 PX86PDPAE pPDDst;
2904 PSHWPDE pPdeDst;
2905
2906 /* Fetch the pgm pool shadow descriptor. */
2907 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
2908 AssertRCSuccessReturn(rc, rc);
2909 Assert(pShwPde);
2910
2911 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2912 pPdeDst = &pPDDst->a[iPDDst];
2913# else
2914 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) /*& SHW_PD_MASK - only pool index atm!*/;
2915 PX86PDEPAE pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
2916# endif
2917
2918# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2919 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2920 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2921 PX86PDPAE pPDDst;
2922 PX86PDPT pPdptDst;
2923 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2924 AssertRCSuccessReturn(rc, rc);
2925 Assert(pPDDst);
2926 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2927
2928 /* Fetch the pgm pool shadow descriptor. */
2929 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2930 Assert(pShwPde);
2931
2932# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2933 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2934 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2935 PEPTPD pPDDst;
2936 PEPTPDPT pPdptDst;
2937
2938 rc = pgmShwGetEPTPDPtr(pVM, GCPtrPage, &pPdptDst, &pPDDst);
2939 if (rc != VINF_SUCCESS)
2940 {
2941 AssertRC(rc);
2942 return rc;
2943 }
2944 Assert(pPDDst);
2945 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2946
2947 /* Fetch the pgm pool shadow descriptor. */
2948 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2949 Assert(pShwPde);
2950# endif
2951 SHWPDE PdeDst = *pPdeDst;
2952
2953 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2954 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2955
2956 GSTPDE PdeSrc;
2957 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2958 PdeSrc.n.u1Present = 1;
2959 PdeSrc.n.u1Write = 1;
2960 PdeSrc.n.u1Accessed = 1;
2961 PdeSrc.n.u1User = 1;
2962
2963 /*
2964 * Allocate & map the page table.
2965 */
2966 PSHWPT pPTDst;
2967 PPGMPOOLPAGE pShwPage;
2968 RTGCPHYS GCPhys;
2969
2970 /* Virtual address = physical address */
2971 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
2972# if PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_EPT || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2973 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2974# else
2975 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2976# endif
2977
2978 if ( rc == VINF_SUCCESS
2979 || rc == VINF_PGM_CACHED_PAGE)
2980 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2981 else
2982 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2983
2984 PdeDst.u &= X86_PDE_AVL_MASK;
2985 PdeDst.u |= pShwPage->Core.Key;
2986 PdeDst.n.u1Present = 1;
2987 PdeDst.n.u1Write = 1;
2988# if PGM_SHW_TYPE == PGM_TYPE_EPT
2989 PdeDst.n.u1Execute = 1;
2990# else
2991 PdeDst.n.u1User = 1;
2992 PdeDst.n.u1Accessed = 1;
2993# endif
2994 *pPdeDst = PdeDst;
2995
2996 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2997 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2998 return rc;
2999
3000#else
3001 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3002 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3003 return VERR_INTERNAL_ERROR;
3004#endif
3005}
3006
3007
3008
3009/**
3010 * Prefetch a page/set of pages.
3011 *
3012 * Typically used to sync commonly used pages before entering raw mode
3013 * after a CR3 reload.
3014 *
3015 * @returns VBox status code.
3016 * @param pVM VM handle.
3017 * @param GCPtrPage Page to invalidate.
3018 */
3019PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCPTR GCPtrPage)
3020{
3021#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3022 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3023 /*
3024 * Check that all Guest levels thru the PDE are present, getting the
3025 * PD and PDE in the processes.
3026 */
3027 int rc = VINF_SUCCESS;
3028# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3029# if PGM_GST_TYPE == PGM_TYPE_32BIT
3030 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3031 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3032# elif PGM_GST_TYPE == PGM_TYPE_PAE
3033 unsigned iPDSrc;
3034# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3035 X86PDPE PdpeSrc;
3036 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3037# else
3038 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, NULL);
3039# endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
3040 if (!pPDSrc)
3041 return VINF_SUCCESS; /* not present */
3042# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3043 unsigned iPDSrc;
3044 PX86PML4E pPml4eSrc;
3045 X86PDPE PdpeSrc;
3046 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3047 if (!pPDSrc)
3048 return VINF_SUCCESS; /* not present */
3049# endif
3050 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3051# else
3052 PGSTPD pPDSrc = NULL;
3053 const unsigned iPDSrc = 0;
3054 GSTPDE PdeSrc;
3055
3056 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3057 PdeSrc.n.u1Present = 1;
3058 PdeSrc.n.u1Write = 1;
3059 PdeSrc.n.u1Accessed = 1;
3060 PdeSrc.n.u1User = 1;
3061# endif
3062
3063 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3064 {
3065# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3066 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVM->pgm.s, GCPtrPage);
3067# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3068# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3069 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3070 PX86PDPAE pPDDst;
3071 X86PDEPAE PdeDst;
3072# if PGM_GST_TYPE != PGM_TYPE_PAE
3073 X86PDPE PdpeSrc;
3074
3075 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3076 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3077# endif
3078 int rc = pgmShwSyncPaePDPtr(pVM, GCPtrPage, &PdpeSrc, &pPDDst);
3079 if (rc != VINF_SUCCESS)
3080 {
3081 AssertRC(rc);
3082 return rc;
3083 }
3084 Assert(pPDDst);
3085 PdeDst = pPDDst->a[iPDDst];
3086# else
3087 const X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVM->pgm.s, GCPtrPage);
3088# endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
3089
3090# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3091 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3092 PX86PDPAE pPDDst;
3093 X86PDEPAE PdeDst;
3094
3095# if PGM_GST_TYPE == PGM_TYPE_PROT
3096 /* AMD-V nested paging */
3097 X86PML4E Pml4eSrc;
3098 X86PDPE PdpeSrc;
3099 PX86PML4E pPml4eSrc = &Pml4eSrc;
3100
3101 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3102 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3103 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3104# endif
3105
3106 int rc = pgmShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3107 if (rc != VINF_SUCCESS)
3108 {
3109 AssertRC(rc);
3110 return rc;
3111 }
3112 Assert(pPDDst);
3113 PdeDst = pPDDst->a[iPDDst];
3114# endif
3115 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3116 {
3117 if (!PdeDst.n.u1Present)
3118 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3119 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
3120 else
3121 {
3122 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3123 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3124 * makes no sense to prefetch more than one page.
3125 */
3126 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
3127 if (RT_SUCCESS(rc))
3128 rc = VINF_SUCCESS;
3129 }
3130 }
3131 }
3132 return rc;
3133
3134#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3135 return VINF_SUCCESS; /* ignore */
3136#endif
3137}
3138
3139
3140
3141
3142/**
3143 * Syncs a page during a PGMVerifyAccess() call.
3144 *
3145 * @returns VBox status code (informational included).
3146 * @param GCPtrPage The address of the page to sync.
3147 * @param fPage The effective guest page flags.
3148 * @param uErr The trap error code.
3149 */
3150PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3151{
3152 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3153
3154 Assert(!HWACCMIsNestedPagingActive(pVM));
3155#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3156 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3157
3158# ifndef IN_RING0
3159 if (!(fPage & X86_PTE_US))
3160 {
3161 /*
3162 * Mark this page as safe.
3163 */
3164 /** @todo not correct for pages that contain both code and data!! */
3165 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3166 CSAMMarkPage(pVM, (RTRCPTR)GCPtrPage, true);
3167 }
3168# endif
3169
3170 /*
3171 * Get guest PD and index.
3172 */
3173# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3174# if PGM_GST_TYPE == PGM_TYPE_32BIT
3175 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3176 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3177# elif PGM_GST_TYPE == PGM_TYPE_PAE
3178 unsigned iPDSrc;
3179# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3180 X86PDPE PdpeSrc;
3181 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3182# else
3183 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, NULL);
3184# endif
3185
3186 if (pPDSrc)
3187 {
3188 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3189 return VINF_EM_RAW_GUEST_TRAP;
3190 }
3191# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3192 unsigned iPDSrc;
3193 PX86PML4E pPml4eSrc;
3194 X86PDPE PdpeSrc;
3195 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3196 if (!pPDSrc)
3197 {
3198 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3199 return VINF_EM_RAW_GUEST_TRAP;
3200 }
3201# endif
3202# else
3203 PGSTPD pPDSrc = NULL;
3204 const unsigned iPDSrc = 0;
3205# endif
3206 int rc = VINF_SUCCESS;
3207
3208 /*
3209 * First check if the shadow pd is present.
3210 */
3211# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3212 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
3213# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3214 PX86PDEPAE pPdeDst;
3215# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3216 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3217 PX86PDPAE pPDDst;
3218# if PGM_GST_TYPE != PGM_TYPE_PAE
3219 X86PDPE PdpeSrc;
3220
3221 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3222 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3223# endif
3224 rc = pgmShwSyncPaePDPtr(pVM, GCPtrPage, &PdpeSrc, &pPDDst);
3225 if (rc != VINF_SUCCESS)
3226 {
3227 AssertRC(rc);
3228 return rc;
3229 }
3230 Assert(pPDDst);
3231 pPdeDst = &pPDDst->a[iPDDst];
3232# else
3233 pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
3234# endif
3235# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3236 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3237 PX86PDPAE pPDDst;
3238 PX86PDEPAE pPdeDst;
3239
3240# if PGM_GST_TYPE == PGM_TYPE_PROT
3241 /* AMD-V nested paging */
3242 X86PML4E Pml4eSrc;
3243 X86PDPE PdpeSrc;
3244 PX86PML4E pPml4eSrc = &Pml4eSrc;
3245
3246 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3247 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3248 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3249# endif
3250
3251 rc = pgmShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3252 if (rc != VINF_SUCCESS)
3253 {
3254 AssertRC(rc);
3255 return rc;
3256 }
3257 Assert(pPDDst);
3258 pPdeDst = &pPDDst->a[iPDDst];
3259# endif
3260 if (!pPdeDst->n.u1Present)
3261 {
3262 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
3263 AssertRC(rc);
3264 if (rc != VINF_SUCCESS)
3265 return rc;
3266 }
3267
3268# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3269 /* Check for dirty bit fault */
3270 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3271 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3272 Log(("PGMVerifyAccess: success (dirty)\n"));
3273 else
3274 {
3275 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3276#else
3277 {
3278 GSTPDE PdeSrc;
3279 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3280 PdeSrc.n.u1Present = 1;
3281 PdeSrc.n.u1Write = 1;
3282 PdeSrc.n.u1Accessed = 1;
3283 PdeSrc.n.u1User = 1;
3284
3285#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3286 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3287 if (uErr & X86_TRAP_PF_US)
3288 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3289 else /* supervisor */
3290 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3291
3292 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
3293 if (RT_SUCCESS(rc))
3294 {
3295 /* Page was successfully synced */
3296 Log2(("PGMVerifyAccess: success (sync)\n"));
3297 rc = VINF_SUCCESS;
3298 }
3299 else
3300 {
3301 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3302 return VINF_EM_RAW_GUEST_TRAP;
3303 }
3304 }
3305 return rc;
3306
3307#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3308
3309 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3310 return VERR_INTERNAL_ERROR;
3311#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3312}
3313
3314
3315#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3316# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
3317/**
3318 * Figures out which kind of shadow page this guest PDE warrants.
3319 *
3320 * @returns Shadow page kind.
3321 * @param pPdeSrc The guest PDE in question.
3322 * @param cr4 The current guest cr4 value.
3323 */
3324DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const GSTPDE *pPdeSrc, uint32_t cr4)
3325{
3326# if PMG_GST_TYPE == PGM_TYPE_AMD64
3327 if (!pPdeSrc->n.u1Size)
3328# else
3329 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
3330# endif
3331 return BTH_PGMPOOLKIND_PT_FOR_PT;
3332 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
3333 //{
3334 // case 0:
3335 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
3336 // case X86_PDE4M_RW:
3337 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
3338 // case X86_PDE4M_US:
3339 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
3340 // case X86_PDE4M_RW | X86_PDE4M_US:
3341 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
3342# if 0
3343 // case X86_PDE4M_PAE_NX:
3344 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
3345 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
3346 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
3347 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
3348 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
3349 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
3350 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
3351# endif
3352 return BTH_PGMPOOLKIND_PT_FOR_BIG;
3353 //}
3354}
3355# endif
3356#endif
3357
3358#undef MY_STAM_COUNTER_INC
3359#define MY_STAM_COUNTER_INC(a) do { } while (0)
3360
3361
3362/**
3363 * Syncs the paging hierarchy starting at CR3.
3364 *
3365 * @returns VBox status code, no specials.
3366 * @param pVM The virtual machine.
3367 * @param cr0 Guest context CR0 register
3368 * @param cr3 Guest context CR3 register
3369 * @param cr4 Guest context CR4 register
3370 * @param fGlobal Including global page directories or not
3371 */
3372PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3373{
3374 if (VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
3375 fGlobal = true; /* Change this CR3 reload to be a global one. */
3376
3377 LogFlow(("SyncCR3 %d\n", fGlobal));
3378
3379#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3380 /*
3381 * Update page access handlers.
3382 * The virtual are always flushed, while the physical are only on demand.
3383 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3384 * have to look into that later because it will have a bad influence on the performance.
3385 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3386 * bird: Yes, but that won't work for aliases.
3387 */
3388 /** @todo this MUST go away. See #1557. */
3389 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3390 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3391 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3392#endif
3393
3394#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3395 /*
3396 * Nested / EPT - almost no work.
3397 */
3398 /** @todo check if this is really necessary; the call does it as well... */
3399 HWACCMFlushTLB(pVM);
3400 return VINF_SUCCESS;
3401
3402#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3403 /*
3404 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3405 * out the shadow parts when the guest modifies its tables.
3406 */
3407 return VINF_SUCCESS;
3408
3409#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3410
3411# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3412# ifdef PGM_WITHOUT_MAPPINGS
3413 Assert(pVM->pgm.s.fMappingsFixed);
3414 return VINF_SUCCESS;
3415# else
3416 /* Nothing to do when mappings are fixed. */
3417 if (pVM->pgm.s.fMappingsFixed)
3418 return VINF_SUCCESS;
3419
3420 int rc = PGMMapResolveConflicts(pVM);
3421 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3422 if (rc == VINF_PGM_SYNC_CR3)
3423 {
3424 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3425 return VINF_PGM_SYNC_CR3;
3426 }
3427# endif
3428 return VINF_SUCCESS;
3429# else
3430 /*
3431 * PAE and 32-bit legacy mode (shadow).
3432 * (Guest PAE, 32-bit legacy, protected and real modes.)
3433 */
3434 Assert(fGlobal || (cr4 & X86_CR4_PGE));
3435 MY_STAM_COUNTER_INC(fGlobal ? &pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Global) : &pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3NotGlobal));
3436
3437# if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE
3438 bool const fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
3439
3440 /*
3441 * Get page directory addresses.
3442 */
3443# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3444 PX86PDE pPDEDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, 0);
3445# else /* PGM_SHW_TYPE == PGM_TYPE_PAE */
3446# if PGM_GST_TYPE == PGM_TYPE_32BIT
3447 PX86PDEPAE pPDEDst = NULL;
3448# endif
3449# endif
3450
3451# if PGM_GST_TYPE == PGM_TYPE_32BIT
3452 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3453 Assert(pPDSrc);
3454# if !defined(IN_RC) && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
3455 Assert(PGMPhysGCPhys2R3PtrAssert(pVM, (RTGCPHYS)(cr3 & GST_CR3_PAGE_MASK), sizeof(*pPDSrc)) == (RTR3PTR)pPDSrc);
3456# endif
3457# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3458
3459 /*
3460 * Iterate the the CR3 page.
3461 */
3462 PPGMMAPPING pMapping;
3463 unsigned iPdNoMapping;
3464 const bool fRawR0Enabled = EMIsRawRing0Enabled(pVM);
3465 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3466
3467 /* Only check mappings if they are supposed to be put into the shadow page table. */
3468 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
3469 {
3470 pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
3471 iPdNoMapping = (pMapping) ? (pMapping->GCPtr >> GST_PD_SHIFT) : ~0U;
3472 }
3473 else
3474 {
3475 pMapping = 0;
3476 iPdNoMapping = ~0U;
3477 }
3478
3479# if PGM_GST_TYPE == PGM_TYPE_PAE
3480 for (uint64_t iPdpt = 0; iPdpt < GST_PDPE_ENTRIES; iPdpt++)
3481 {
3482 unsigned iPDSrc;
3483 X86PDPE PdpeSrc;
3484 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, iPdpt << X86_PDPT_SHIFT, &iPDSrc, &PdpeSrc);
3485 PX86PDEPAE pPDEDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, iPdpt << X86_PDPT_SHIFT);
3486 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
3487
3488 if (pPDSrc == NULL)
3489 {
3490 /* PDPE not present */
3491 if (pPdptDst->a[iPdpt].n.u1Present)
3492 {
3493 LogFlow(("SyncCR3: guest PDPE %lld not present; clear shw pdpe\n", iPdpt));
3494 /* for each page directory entry */
3495 for (unsigned iPD = 0; iPD < RT_ELEMENTS(pPDSrc->a); iPD++)
3496 {
3497 if ( pPDEDst[iPD].n.u1Present
3498 && !(pPDEDst[iPD].u & PGM_PDFLAGS_MAPPING))
3499 {
3500 pgmPoolFree(pVM, pPDEDst[iPD].u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPdpt * X86_PG_PAE_ENTRIES + iPD);
3501 pPDEDst[iPD].u = 0;
3502 }
3503 }
3504 }
3505 if (!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING))
3506 pPdptDst->a[iPdpt].n.u1Present = 0;
3507 continue;
3508 }
3509# else /* PGM_GST_TYPE != PGM_TYPE_PAE */
3510 {
3511# endif /* PGM_GST_TYPE != PGM_TYPE_PAE */
3512 for (unsigned iPD = 0; iPD < RT_ELEMENTS(pPDSrc->a); iPD++)
3513 {
3514# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3515 if ((iPD & 255) == 0) /* Start of new PD. */
3516 pPDEDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, (uint32_t)iPD << GST_PD_SHIFT);
3517# endif
3518# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3519 Assert(pgmShwGet32BitPDEPtr(&pVM->pgm.s, (uint32_t)iPD << SHW_PD_SHIFT) == pPDEDst);
3520# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3521# if defined(VBOX_STRICT) && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) /* Unfortuantely not reliable with PGMR0DynMap and multiple VMs. */
3522 RTGCPTR GCPtrStrict = (uint32_t)iPD << GST_PD_SHIFT;
3523# if PGM_GST_TYPE == PGM_TYPE_PAE
3524 GCPtrStrict |= iPdpt << X86_PDPT_SHIFT;
3525# endif
3526 AssertMsg(pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrStrict) == pPDEDst, ("%p vs %p (%RGv)\n", pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrStrict), pPDEDst, GCPtrStrict));
3527# endif /* VBOX_STRICT */
3528# endif
3529 GSTPDE PdeSrc = pPDSrc->a[iPD];
3530 if ( PdeSrc.n.u1Present
3531 && (PdeSrc.n.u1User || fRawR0Enabled))
3532 {
3533# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3534 || PGM_GST_TYPE == PGM_TYPE_PAE) \
3535 && !defined(PGM_WITHOUT_MAPPINGS)
3536
3537 /*
3538 * Check for conflicts with GC mappings.
3539 */
3540# if PGM_GST_TYPE == PGM_TYPE_PAE
3541 if (iPD + iPdpt * X86_PG_PAE_ENTRIES == iPdNoMapping)
3542# else
3543 if (iPD == iPdNoMapping)
3544# endif
3545 {
3546 if (pVM->pgm.s.fMappingsFixed)
3547 {
3548 /* It's fixed, just skip the mapping. */
3549 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
3550 Assert(PGM_GST_TYPE == PGM_TYPE_32BIT || (iPD + cPTs - 1) / X86_PG_PAE_ENTRIES == iPD / X86_PG_PAE_ENTRIES);
3551 iPD += cPTs - 1;
3552# if PGM_SHW_TYPE != PGM_GST_TYPE /* SHW==PAE && GST==32BIT */
3553 pPDEDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, (uint32_t)(iPD + 1) << GST_PD_SHIFT);
3554# else
3555 pPDEDst += cPTs;
3556# endif
3557 pMapping = pMapping->CTX_SUFF(pNext);
3558 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3559 continue;
3560 }
3561# ifdef IN_RING3
3562# if PGM_GST_TYPE == PGM_TYPE_32BIT
3563 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
3564# elif PGM_GST_TYPE == PGM_TYPE_PAE
3565 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPdpt << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
3566# endif
3567 if (RT_FAILURE(rc))
3568 return rc;
3569
3570 /*
3571 * Update iPdNoMapping and pMapping.
3572 */
3573 pMapping = pVM->pgm.s.pMappingsR3;
3574 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
3575 pMapping = pMapping->pNextR3;
3576 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3577# else /* !IN_RING3 */
3578 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3579 return VINF_PGM_SYNC_CR3;
3580# endif /* !IN_RING3 */
3581 }
3582# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3583 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3584# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3585
3586 /*
3587 * Sync page directory entry.
3588 *
3589 * The current approach is to allocated the page table but to set
3590 * the entry to not-present and postpone the page table synching till
3591 * it's actually used.
3592 */
3593# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3594 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
3595# elif PGM_GST_TYPE == PGM_TYPE_PAE
3596 const unsigned iPdShw = iPD + iPdpt * X86_PG_PAE_ENTRIES; NOREF(iPdShw);
3597# else
3598 const unsigned iPdShw = iPD; NOREF(iPdShw);
3599# endif
3600 {
3601 SHWPDE PdeDst = *pPDEDst;
3602 if (PdeDst.n.u1Present)
3603 {
3604 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
3605 RTGCPHYS GCPhys;
3606 if ( !PdeSrc.b.u1Size
3607 || !fBigPagesSupported)
3608 {
3609 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
3610# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3611 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3612 GCPhys |= i * (PAGE_SIZE / 2);
3613# endif
3614 }
3615 else
3616 {
3617 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3618# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3619 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
3620 GCPhys |= i * X86_PAGE_2M_SIZE;
3621# endif
3622 }
3623
3624 if ( pShwPage->GCPhys == GCPhys
3625 && pShwPage->enmKind == PGM_BTH_NAME(CalcPageKind)(&PdeSrc, cr4)
3626 && ( pShwPage->fCached
3627 || ( !fGlobal
3628 && ( false
3629# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
3630 || ( (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
3631 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE)) /* global 2/4MB page. */
3632 || ( !pShwPage->fSeenNonGlobal
3633 && (cr4 & X86_CR4_PGE))
3634# endif
3635 )
3636 )
3637 )
3638 && ( (PdeSrc.u & (X86_PDE_US | X86_PDE_RW)) == (PdeDst.u & (X86_PDE_US | X86_PDE_RW))
3639 || ( fBigPagesSupported
3640 && ((PdeSrc.u & (X86_PDE_US | X86_PDE4M_PS | X86_PDE4M_D)) | PGM_PDFLAGS_TRACK_DIRTY)
3641 == ((PdeDst.u & (X86_PDE_US | X86_PDE_RW | PGM_PDFLAGS_TRACK_DIRTY)) | X86_PDE4M_PS))
3642 )
3643 )
3644 {
3645# ifdef VBOX_WITH_STATISTICS
3646 if ( !fGlobal
3647 && (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
3648 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE))
3649 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstSkippedGlobalPD));
3650 else if (!fGlobal && !pShwPage->fSeenNonGlobal && (cr4 & X86_CR4_PGE))
3651 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstSkippedGlobalPT));
3652 else
3653 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstCacheHit));
3654# endif /* VBOX_WITH_STATISTICS */
3655 /** @todo a replacement strategy isn't really needed unless we're using a very small pool < 512 pages.
3656 * The whole ageing stuff should be put in yet another set of #ifdefs. For now, let's just skip it. */
3657 //# ifdef PGMPOOL_WITH_CACHE
3658 // pgmPoolCacheUsed(pPool, pShwPage);
3659 //# endif
3660 }
3661 else
3662 {
3663 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPdShw);
3664 pPDEDst->u = 0;
3665 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstFreed));
3666 }
3667 }
3668 else
3669 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstNotPresent));
3670
3671 /* advance */
3672 pPDEDst++;
3673 } /* foreach 2MB PAE PDE in 4MB guest PDE */
3674 }
3675# if PGM_GST_TYPE == PGM_TYPE_PAE
3676 else if (iPD + iPdpt * X86_PG_PAE_ENTRIES != iPdNoMapping)
3677# else
3678 else if (iPD != iPdNoMapping)
3679# endif
3680 {
3681 /*
3682 * Check if there is any page directory to mark not present here.
3683 */
3684# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3685 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
3686# elif PGM_GST_TYPE == PGM_TYPE_PAE
3687 const unsigned iPdShw = iPD + iPdpt * X86_PG_PAE_ENTRIES;
3688# else
3689 const unsigned iPdShw = iPD;
3690# endif
3691 {
3692 if (pPDEDst->n.u1Present)
3693 {
3694 pgmPoolFree(pVM, pPDEDst->u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPdShw);
3695 pPDEDst->u = 0;
3696 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstFreedSrcNP));
3697 }
3698 pPDEDst++;
3699 }
3700 }
3701 else
3702 {
3703# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3704 || PGM_GST_TYPE == PGM_TYPE_PAE) \
3705 && !defined(PGM_WITHOUT_MAPPINGS)
3706
3707 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
3708
3709 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3710 if (pVM->pgm.s.fMappingsFixed)
3711 {
3712 /* It's fixed, just skip the mapping. */
3713 pMapping = pMapping->CTX_SUFF(pNext);
3714 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3715 }
3716 else
3717 {
3718 /*
3719 * Check for conflicts for subsequent pagetables
3720 * and advance to the next mapping.
3721 */
3722 iPdNoMapping = ~0U;
3723 unsigned iPT = cPTs;
3724 while (iPT-- > 1)
3725 {
3726 if ( pPDSrc->a[iPD + iPT].n.u1Present
3727 && (pPDSrc->a[iPD + iPT].n.u1User || fRawR0Enabled))
3728 {
3729# ifdef IN_RING3
3730# if PGM_GST_TYPE == PGM_TYPE_32BIT
3731 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
3732# elif PGM_GST_TYPE == PGM_TYPE_PAE
3733 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPdpt << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
3734# endif
3735 if (RT_FAILURE(rc))
3736 return rc;
3737
3738 /*
3739 * Update iPdNoMapping and pMapping.
3740 */
3741 pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
3742 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
3743 pMapping = pMapping->CTX_SUFF(pNext);
3744 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3745 break;
3746# else
3747 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3748 return VINF_PGM_SYNC_CR3;
3749# endif
3750 }
3751 }
3752 if (iPdNoMapping == ~0U && pMapping)
3753 {
3754 pMapping = pMapping->CTX_SUFF(pNext);
3755 if (pMapping)
3756 iPdNoMapping = pMapping->GCPtr >> GST_PD_SHIFT;
3757 }
3758 }
3759
3760 /* advance. */
3761 Assert(PGM_GST_TYPE == PGM_TYPE_32BIT || (iPD + cPTs - 1) / X86_PG_PAE_ENTRIES == iPD / X86_PG_PAE_ENTRIES);
3762 iPD += cPTs - 1;
3763# if PGM_SHW_TYPE != PGM_GST_TYPE /* SHW==PAE && GST==32BIT */
3764 pPDEDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, (uint32_t)(iPD + 1) << GST_PD_SHIFT);
3765# else
3766 pPDEDst += cPTs;
3767# endif
3768# if PGM_GST_TYPE != PGM_SHW_TYPE
3769 AssertCompile(PGM_GST_TYPE == PGM_TYPE_32BIT && PGM_SHW_TYPE == PGM_TYPE_PAE);
3770# endif
3771# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3772 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3773# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3774 }
3775
3776 } /* for iPD */
3777 } /* for each PDPTE (PAE) */
3778 return VINF_SUCCESS;
3779
3780# else /* guest real and protected mode */
3781 return VINF_SUCCESS;
3782# endif
3783#endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
3784#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3785}
3786
3787
3788
3789
3790#ifdef VBOX_STRICT
3791#ifdef IN_RC
3792# undef AssertMsgFailed
3793# define AssertMsgFailed Log
3794#endif
3795#ifdef IN_RING3
3796# include <VBox/dbgf.h>
3797
3798/**
3799 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3800 *
3801 * @returns VBox status code (VINF_SUCCESS).
3802 * @param pVM The VM handle.
3803 * @param cr3 The root of the hierarchy.
3804 * @param crr The cr4, only PAE and PSE is currently used.
3805 * @param fLongMode Set if long mode, false if not long mode.
3806 * @param cMaxDepth Number of levels to dump.
3807 * @param pHlp Pointer to the output functions.
3808 */
3809__BEGIN_DECLS
3810VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3811__END_DECLS
3812
3813#endif
3814
3815/**
3816 * Checks that the shadow page table is in sync with the guest one.
3817 *
3818 * @returns The number of errors.
3819 * @param pVM The virtual machine.
3820 * @param cr3 Guest context CR3 register
3821 * @param cr4 Guest context CR4 register
3822 * @param GCPtr Where to start. Defaults to 0.
3823 * @param cb How much to check. Defaults to everything.
3824 */
3825PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3826{
3827#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3828 return 0;
3829#else
3830 unsigned cErrors = 0;
3831
3832#if PGM_GST_TYPE == PGM_TYPE_PAE
3833 /** @todo currently broken; crashes below somewhere */
3834 AssertFailed();
3835#endif
3836
3837#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3838 || PGM_GST_TYPE == PGM_TYPE_PAE \
3839 || PGM_GST_TYPE == PGM_TYPE_AMD64
3840
3841# if PGM_GST_TYPE == PGM_TYPE_AMD64
3842 bool fBigPagesSupported = true;
3843# else
3844 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
3845# endif
3846 PPGM pPGM = &pVM->pgm.s;
3847 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3848 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3849# ifndef IN_RING0
3850 RTHCPHYS HCPhys; /* general usage. */
3851# endif
3852 int rc;
3853
3854 /*
3855 * Check that the Guest CR3 and all its mappings are correct.
3856 */
3857 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3858 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3859 false);
3860# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3861# if PGM_GST_TYPE == PGM_TYPE_32BIT
3862 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3863# else
3864 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3865# endif
3866 AssertRCReturn(rc, 1);
3867 HCPhys = NIL_RTHCPHYS;
3868 rc = pgmRamGCPhys2HCPhys(pPGM, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3869 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3870# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3871 RTGCPHYS GCPhys;
3872 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3873 AssertRCReturn(rc, 1);
3874 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3875# endif
3876# endif /* !IN_RING0 */
3877
3878 /*
3879 * Get and check the Shadow CR3.
3880 */
3881# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3882 unsigned cPDEs = X86_PG_ENTRIES;
3883 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3884# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3885# if PGM_GST_TYPE == PGM_TYPE_32BIT
3886 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3887# else
3888 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3889# endif
3890 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3891# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3892 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3893 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3894# endif
3895 if (cb != ~(RTGCPTR)0)
3896 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3897
3898/** @todo call the other two PGMAssert*() functions. */
3899
3900# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3901 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3902# endif
3903
3904# if PGM_GST_TYPE == PGM_TYPE_AMD64
3905 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3906
3907 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3908 {
3909 PPGMPOOLPAGE pShwPdpt = NULL;
3910 PX86PML4E pPml4eSrc;
3911 PX86PML4E pPml4eDst;
3912 RTGCPHYS GCPhysPdptSrc;
3913
3914 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVM->pgm.s, iPml4);
3915 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVM->pgm.s, iPml4);
3916
3917 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3918 if (!pPml4eDst->n.u1Present)
3919 {
3920 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3921 continue;
3922 }
3923
3924 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3925 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3926
3927 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3928 {
3929 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3930 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3931 cErrors++;
3932 continue;
3933 }
3934
3935 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3936 {
3937 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3938 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3939 cErrors++;
3940 continue;
3941 }
3942
3943 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3944 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3945 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3946 {
3947 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3948 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3949 cErrors++;
3950 continue;
3951 }
3952# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3953 {
3954# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3955
3956# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3957 /*
3958 * Check the PDPTEs too.
3959 */
3960 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3961
3962 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3963 {
3964 unsigned iPDSrc;
3965 PPGMPOOLPAGE pShwPde = NULL;
3966 PX86PDPE pPdpeDst;
3967 RTGCPHYS GCPhysPdeSrc;
3968# if PGM_GST_TYPE == PGM_TYPE_PAE
3969 X86PDPE PdpeSrc;
3970 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3971 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
3972# else
3973 PX86PML4E pPml4eSrc;
3974 X86PDPE PdpeSrc;
3975 PX86PDPT pPdptDst;
3976 PX86PDPAE pPDDst;
3977 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3978
3979 rc = pgmShwGetLongModePDPtr(pVM, GCPtr, NULL, &pPdptDst, &pPDDst);
3980 if (rc != VINF_SUCCESS)
3981 {
3982 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3983 GCPtr += 512 * _2M;
3984 continue; /* next PDPTE */
3985 }
3986 Assert(pPDDst);
3987# endif
3988 Assert(iPDSrc == 0);
3989
3990 pPdpeDst = &pPdptDst->a[iPdpt];
3991
3992 if (!pPdpeDst->n.u1Present)
3993 {
3994 GCPtr += 512 * _2M;
3995 continue; /* next PDPTE */
3996 }
3997
3998 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3999 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
4000
4001 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
4002 {
4003 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
4004 GCPtr += 512 * _2M;
4005 cErrors++;
4006 continue;
4007 }
4008
4009 if (GCPhysPdeSrc != pShwPde->GCPhys)
4010 {
4011# if PGM_GST_TYPE == PGM_TYPE_AMD64
4012 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
4013# else
4014 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
4015# endif
4016 GCPtr += 512 * _2M;
4017 cErrors++;
4018 continue;
4019 }
4020
4021# if PGM_GST_TYPE == PGM_TYPE_AMD64
4022 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
4023 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
4024 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
4025 {
4026 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
4027 GCPtr += 512 * _2M;
4028 cErrors++;
4029 continue;
4030 }
4031# endif
4032
4033# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
4034 {
4035# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
4036# if PGM_GST_TYPE == PGM_TYPE_32BIT
4037 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
4038# if PGM_SHW_TYPE == PGM_TYPE_32BIT
4039 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVM->pgm.s);
4040# endif
4041# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
4042 /*
4043 * Iterate the shadow page directory.
4044 */
4045 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
4046 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
4047
4048 for (;
4049 iPDDst < cPDEs;
4050 iPDDst++, GCPtr += cIncrement)
4051 {
4052# if PGM_SHW_TYPE == PGM_TYPE_PAE
4053 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
4054# else
4055 const SHWPDE PdeDst = pPDDst->a[iPDDst];
4056# endif
4057 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
4058 {
4059 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
4060 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
4061 {
4062 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
4063 cErrors++;
4064 continue;
4065 }
4066 }
4067 else if ( (PdeDst.u & X86_PDE_P)
4068 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
4069 )
4070 {
4071 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
4072 PPGMPOOLPAGE pPoolPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
4073 if (!pPoolPage)
4074 {
4075 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
4076 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
4077 cErrors++;
4078 continue;
4079 }
4080 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
4081
4082 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
4083 {
4084 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
4085 GCPtr, (uint64_t)PdeDst.u));
4086 cErrors++;
4087 }
4088
4089 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
4090 {
4091 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
4092 GCPtr, (uint64_t)PdeDst.u));
4093 cErrors++;
4094 }
4095
4096 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
4097 if (!PdeSrc.n.u1Present)
4098 {
4099 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
4100 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
4101 cErrors++;
4102 continue;
4103 }
4104
4105 if ( !PdeSrc.b.u1Size
4106 || !fBigPagesSupported)
4107 {
4108 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
4109# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4110 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
4111# endif
4112 }
4113 else
4114 {
4115# if PGM_GST_TYPE == PGM_TYPE_32BIT
4116 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
4117 {
4118 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
4119 GCPtr, (uint64_t)PdeSrc.u));
4120 cErrors++;
4121 continue;
4122 }
4123# endif
4124 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
4125# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4126 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
4127# endif
4128 }
4129
4130 if ( pPoolPage->enmKind
4131 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
4132 {
4133 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
4134 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
4135 cErrors++;
4136 }
4137
4138 PPGMPAGE pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
4139 if (!pPhysPage)
4140 {
4141 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4142 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4143 cErrors++;
4144 continue;
4145 }
4146
4147 if (GCPhysGst != pPoolPage->GCPhys)
4148 {
4149 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
4150 GCPhysGst, pPoolPage->GCPhys, GCPtr));
4151 cErrors++;
4152 continue;
4153 }
4154
4155 if ( !PdeSrc.b.u1Size
4156 || !fBigPagesSupported)
4157 {
4158 /*
4159 * Page Table.
4160 */
4161 const GSTPT *pPTSrc;
4162 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
4163 if (RT_FAILURE(rc))
4164 {
4165 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4166 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4167 cErrors++;
4168 continue;
4169 }
4170 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
4171 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
4172 {
4173 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
4174 // (This problem will go away when/if we shadow multiple CR3s.)
4175 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4176 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4177 cErrors++;
4178 continue;
4179 }
4180 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4181 {
4182 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
4183 GCPtr, (uint64_t)PdeDst.u));
4184 cErrors++;
4185 continue;
4186 }
4187
4188 /* iterate the page table. */
4189# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4190 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
4191 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
4192# else
4193 const unsigned offPTSrc = 0;
4194# endif
4195 for (unsigned iPT = 0, off = 0;
4196 iPT < RT_ELEMENTS(pPTDst->a);
4197 iPT++, off += PAGE_SIZE)
4198 {
4199 const SHWPTE PteDst = pPTDst->a[iPT];
4200
4201 /* skip not-present entries. */
4202 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
4203 continue;
4204 Assert(PteDst.n.u1Present);
4205
4206 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
4207 if (!PteSrc.n.u1Present)
4208 {
4209# ifdef IN_RING3
4210 PGMAssertHandlerAndFlagsInSync(pVM);
4211 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
4212# endif
4213 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
4214 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
4215 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
4216 cErrors++;
4217 continue;
4218 }
4219
4220 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
4221# if 1 /** @todo sync accessed bit properly... */
4222 fIgnoreFlags |= X86_PTE_A;
4223# endif
4224
4225 /* match the physical addresses */
4226 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
4227 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
4228
4229# ifdef IN_RING3
4230 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4231 if (RT_FAILURE(rc))
4232 {
4233 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4234 {
4235 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4236 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4237 cErrors++;
4238 continue;
4239 }
4240 }
4241 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4242 {
4243 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4244 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4245 cErrors++;
4246 continue;
4247 }
4248# endif
4249
4250 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
4251 if (!pPhysPage)
4252 {
4253# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4254 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4255 {
4256 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4257 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4258 cErrors++;
4259 continue;
4260 }
4261# endif
4262 if (PteDst.n.u1Write)
4263 {
4264 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4265 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4266 cErrors++;
4267 }
4268 fIgnoreFlags |= X86_PTE_RW;
4269 }
4270 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4271 {
4272 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4273 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4274 cErrors++;
4275 continue;
4276 }
4277
4278 /* flags */
4279 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4280 {
4281 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4282 {
4283 if (PteDst.n.u1Write)
4284 {
4285 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4286 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4287 cErrors++;
4288 continue;
4289 }
4290 fIgnoreFlags |= X86_PTE_RW;
4291 }
4292 else
4293 {
4294 if (PteDst.n.u1Present)
4295 {
4296 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4297 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4298 cErrors++;
4299 continue;
4300 }
4301 fIgnoreFlags |= X86_PTE_P;
4302 }
4303 }
4304 else
4305 {
4306 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4307 {
4308 if (PteDst.n.u1Write)
4309 {
4310 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4311 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4312 cErrors++;
4313 continue;
4314 }
4315 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4316 {
4317 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4318 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4319 cErrors++;
4320 continue;
4321 }
4322 if (PteDst.n.u1Dirty)
4323 {
4324 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4325 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4326 cErrors++;
4327 }
4328# if 0 /** @todo sync access bit properly... */
4329 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4330 {
4331 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4332 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4333 cErrors++;
4334 }
4335 fIgnoreFlags |= X86_PTE_RW;
4336# else
4337 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4338# endif
4339 }
4340 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4341 {
4342 /* access bit emulation (not implemented). */
4343 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4344 {
4345 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4346 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4347 cErrors++;
4348 continue;
4349 }
4350 if (!PteDst.n.u1Accessed)
4351 {
4352 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4353 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4354 cErrors++;
4355 }
4356 fIgnoreFlags |= X86_PTE_P;
4357 }
4358# ifdef DEBUG_sandervl
4359 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4360# endif
4361 }
4362
4363 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4364 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4365 )
4366 {
4367 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4368 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4369 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4370 cErrors++;
4371 continue;
4372 }
4373 } /* foreach PTE */
4374 }
4375 else
4376 {
4377 /*
4378 * Big Page.
4379 */
4380 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4381 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4382 {
4383 if (PdeDst.n.u1Write)
4384 {
4385 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4386 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4387 cErrors++;
4388 continue;
4389 }
4390 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4391 {
4392 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4393 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4394 cErrors++;
4395 continue;
4396 }
4397# if 0 /** @todo sync access bit properly... */
4398 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4399 {
4400 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4401 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4402 cErrors++;
4403 }
4404 fIgnoreFlags |= X86_PTE_RW;
4405# else
4406 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4407# endif
4408 }
4409 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4410 {
4411 /* access bit emulation (not implemented). */
4412 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4413 {
4414 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4415 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4416 cErrors++;
4417 continue;
4418 }
4419 if (!PdeDst.n.u1Accessed)
4420 {
4421 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4422 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4423 cErrors++;
4424 }
4425 fIgnoreFlags |= X86_PTE_P;
4426 }
4427
4428 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4429 {
4430 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4431 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4432 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4433 cErrors++;
4434 }
4435
4436 /* iterate the page table. */
4437 for (unsigned iPT = 0, off = 0;
4438 iPT < RT_ELEMENTS(pPTDst->a);
4439 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4440 {
4441 const SHWPTE PteDst = pPTDst->a[iPT];
4442
4443 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4444 {
4445 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4446 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4447 cErrors++;
4448 }
4449
4450 /* skip not-present entries. */
4451 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4452 continue;
4453
4454 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4455
4456 /* match the physical addresses */
4457 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4458
4459# ifdef IN_RING3
4460 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4461 if (RT_FAILURE(rc))
4462 {
4463 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4464 {
4465 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4466 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4467 cErrors++;
4468 }
4469 }
4470 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4471 {
4472 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4473 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4474 cErrors++;
4475 continue;
4476 }
4477# endif
4478 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
4479 if (!pPhysPage)
4480 {
4481# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4482 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4483 {
4484 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4485 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4486 cErrors++;
4487 continue;
4488 }
4489# endif
4490 if (PteDst.n.u1Write)
4491 {
4492 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4493 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4494 cErrors++;
4495 }
4496 fIgnoreFlags |= X86_PTE_RW;
4497 }
4498 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4499 {
4500 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4501 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4502 cErrors++;
4503 continue;
4504 }
4505
4506 /* flags */
4507 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4508 {
4509 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4510 {
4511 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4512 {
4513 if (PteDst.n.u1Write)
4514 {
4515 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4516 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4517 cErrors++;
4518 continue;
4519 }
4520 fIgnoreFlags |= X86_PTE_RW;
4521 }
4522 }
4523 else
4524 {
4525 if (PteDst.n.u1Present)
4526 {
4527 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4528 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4529 cErrors++;
4530 continue;
4531 }
4532 fIgnoreFlags |= X86_PTE_P;
4533 }
4534 }
4535
4536 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4537 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4538 )
4539 {
4540 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4541 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4542 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4543 cErrors++;
4544 continue;
4545 }
4546 } /* for each PTE */
4547 }
4548 }
4549 /* not present */
4550
4551 } /* for each PDE */
4552
4553 } /* for each PDPTE */
4554
4555 } /* for each PML4E */
4556
4557# ifdef DEBUG
4558 if (cErrors)
4559 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4560# endif
4561
4562#endif /* GST == 32BIT, PAE or AMD64 */
4563 return cErrors;
4564
4565#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4566}
4567#endif /* VBOX_STRICT */
4568
4569
4570/**
4571 * Sets up the CR3 for shadow paging
4572 *
4573 * @returns Strict VBox status code.
4574 * @retval VINF_SUCCESS.
4575 *
4576 * @param pVM VM handle.
4577 * @param GCPhysCR3 The physical address in the CR3 register.
4578 */
4579PGM_BTH_DECL(int, MapCR3)(PVM pVM, RTGCPHYS GCPhysCR3)
4580{
4581 /* Update guest paging info. */
4582#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4583 || PGM_GST_TYPE == PGM_TYPE_PAE \
4584 || PGM_GST_TYPE == PGM_TYPE_AMD64
4585
4586 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4587
4588 /*
4589 * Map the page CR3 points at.
4590 */
4591 RTHCPTR HCPtrGuestCR3;
4592 RTHCPHYS HCPhysGuestCR3;
4593# ifdef VBOX_WITH_NEW_PHYS_CODE
4594 /** @todo this needs some reworking. current code is just a big hack. */
4595# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4596# if 1 /* temp hack */
4597 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
4598 return VINF_PGM_SYNC_CR3;
4599# else
4600 AssertFailedReturn(VERR_INTERNAL_ERROR);
4601# endif
4602 int rc = VERR_INTERNAL_ERROR;
4603# else
4604 pgmLock(pVM);
4605 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4606 AssertReturn(pPage, VERR_INTERNAL_ERROR);
4607 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3);
4608 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPage);
4609 pgmUnlock(pVM);
4610# endif
4611# else /* !VBOX_WITH_NEW_PHYS_CODE */
4612 int rc = pgmRamGCPhys2HCPtrAndHCPhys(&pVM->pgm.s, GCPhysCR3 & GST_CR3_PAGE_MASK, &HCPtrGuestCR3, &HCPhysGuestCR3);
4613# endif /* !VBOX_WITH_NEW_PHYS_CODE */
4614 if (RT_SUCCESS(rc))
4615 {
4616 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4617 if (RT_SUCCESS(rc))
4618 {
4619# ifdef IN_RC
4620 PGM_INVL_PG(pVM->pgm.s.GCPtrCR3Mapping);
4621# endif
4622# if PGM_GST_TYPE == PGM_TYPE_32BIT
4623 pVM->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4624# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4625 pVM->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4626# endif
4627 pVM->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))pVM->pgm.s.GCPtrCR3Mapping;
4628
4629# elif PGM_GST_TYPE == PGM_TYPE_PAE
4630 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4631 pVM->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4632# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4633 pVM->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4634# endif
4635 pVM->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RCPTRTYPE(uint8_t *))pVM->pgm.s.GCPtrCR3Mapping + off);
4636 Log(("Cached mapping %RRv\n", pVM->pgm.s.pGstPaePdptRC));
4637
4638 /*
4639 * Map the 4 PDs too.
4640 */
4641 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVM->pgm.s);
4642 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4643 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4644 {
4645 if (pGuestPDPT->a[i].n.u1Present)
4646 {
4647 RTHCPTR HCPtr;
4648 RTHCPHYS HCPhys;
4649 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4650# ifdef VBOX_WITH_NEW_PHYS_CODE
4651# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4652 AssertFailedReturn(VERR_INTERNAL_ERROR);
4653 int rc2 = VERR_INTERNAL_ERROR;
4654# else
4655 pgmLock(pVM);
4656 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4657 AssertReturn(pPage, VERR_INTERNAL_ERROR);
4658 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4659 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4660 pgmUnlock(pVM);
4661# endif
4662# else /* !VBOX_WITH_NEW_PHYS_CODE */
4663 int rc2 = pgmRamGCPhys2HCPtrAndHCPhys(&pVM->pgm.s, GCPhys, &HCPtr, &HCPhys);
4664# endif /* !VBOX_WITH_NEW_PHYS_CODE */
4665 if (RT_SUCCESS(rc2))
4666 {
4667 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4668 AssertRCReturn(rc, rc);
4669
4670 pVM->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4671# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4672 pVM->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4673# endif
4674 pVM->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))GCPtr;
4675 pVM->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4676 PGM_INVL_PG(GCPtr); /** @todo This ends up calling HWACCMInvalidatePage, is that correct? */
4677 continue;
4678 }
4679 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4680 }
4681
4682 pVM->pgm.s.apGstPaePDsR3[i] = 0;
4683# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4684 pVM->pgm.s.apGstPaePDsR0[i] = 0;
4685# endif
4686 pVM->pgm.s.apGstPaePDsRC[i] = 0;
4687 pVM->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4688 PGM_INVL_PG(GCPtr); /** @todo this shouldn't be necessary? */
4689 }
4690
4691# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4692 pVM->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4693# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4694 pVM->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4695# endif
4696# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4697 if (!HWACCMIsNestedPagingActive(pVM))
4698 {
4699 /*
4700 * Update the shadow root page as well since that's not fixed.
4701 */
4702 /** @todo Move this into PGMAllBth.h. */
4703 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4704 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4705 {
4706 /* It might have been freed already by a pool flush (see e.g. PGMR3MappingsUnfix). */
4707 /** @todo Coordinate this better with the pool. */
4708 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3)->enmKind != PGMPOOLKIND_FREE)
4709 pgmPoolFreeByPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3), PGMPOOL_IDX_AMD64_CR3, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->GCPhys >> PAGE_SHIFT);
4710 pVM->pgm.s.pShwPageCR3R3 = 0;
4711 pVM->pgm.s.pShwPageCR3R0 = 0;
4712 pVM->pgm.s.pShwRootR3 = 0;
4713# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4714 pVM->pgm.s.pShwRootR0 = 0;
4715# endif
4716 pVM->pgm.s.HCPhysShwCR3 = 0;
4717 }
4718
4719 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4720 rc = pgmPoolAlloc(pVM, GCPhysCR3, PGMPOOLKIND_64BIT_PML4, PGMPOOL_IDX_AMD64_CR3, GCPhysCR3 >> PAGE_SHIFT, &pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4721 if (rc == VERR_PGM_POOL_FLUSHED)
4722 {
4723 Log(("MapCR3: PGM pool flushed -> signal sync cr3\n"));
4724 Assert(VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
4725 return VINF_PGM_SYNC_CR3;
4726 }
4727 AssertRCReturn(rc, rc);
4728# ifdef IN_RING0
4729 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4730# else
4731 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4732# endif
4733 pVM->pgm.s.pShwRootR3 = (R3PTRTYPE(void *))pVM->pgm.s.CTX_SUFF(pShwPageCR3)->pvPageR3;
4734 Assert(pVM->pgm.s.pShwRootR3);
4735# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4736 pVM->pgm.s.pShwRootR0 = (R0PTRTYPE(void *))PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4737# endif
4738 pVM->pgm.s.HCPhysShwCR3 = pVM->pgm.s.CTX_SUFF(pShwPageCR3)->Core.Key;
4739 rc = VINF_SUCCESS; /* clear it - pgmPoolAlloc returns hints. */
4740 }
4741# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
4742# endif
4743 }
4744 else
4745 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4746 }
4747 else
4748 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4749
4750#else /* prot/real stub */
4751 int rc = VINF_SUCCESS;
4752#endif
4753
4754#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4755 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4756# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4757 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4758 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4759 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4760 && PGM_GST_TYPE != PGM_TYPE_PROT))
4761
4762 Assert(!HWACCMIsNestedPagingActive(pVM));
4763
4764 /*
4765 * Update the shadow root page as well since that's not fixed.
4766 */
4767 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4768 PPGMPOOLPAGE pOldShwPageCR3 = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
4769 uint32_t iOldShwUserTable = pVM->pgm.s.iShwUserTable;
4770 uint32_t iOldShwUser = pVM->pgm.s.iShwUser;
4771 PPGMPOOLPAGE pNewShwPageCR3;
4772
4773 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4774 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3);
4775 if (rc == VERR_PGM_POOL_FLUSHED)
4776 {
4777 Log(("MapCR3: PGM pool flushed -> signal sync cr3\n"));
4778 Assert(VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
4779 return VINF_PGM_SYNC_CR3;
4780 }
4781 AssertRCReturn(rc, rc);
4782 rc = VINF_SUCCESS;
4783
4784 /* Mark the page as locked; disallow flushing. */
4785 pgmPoolLockPage(pPool, pNewShwPageCR3);
4786
4787# ifdef IN_RC
4788 /** NOTE: We can't deal with jumps to ring 3 here as we're now in an inconsistent state! */
4789 bool fLog = VMMGCLogDisable(pVM);
4790# endif
4791
4792 pVM->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4793 pVM->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4794 pVM->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4795# ifdef IN_RING0
4796 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4797 pVM->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4798# elif defined(IN_RC)
4799 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4800 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4801# else
4802 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4803 pVM->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4804# endif
4805
4806# ifndef PGM_WITHOUT_MAPPINGS
4807 /* Apply all hypervisor mappings to the new CR3.
4808 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4809 * make sure we check for conflicts in the new CR3 root.
4810 */
4811# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4812 Assert(VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL) || VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
4813# endif
4814 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4815 AssertRCReturn(rc, rc);
4816# endif
4817
4818 /* Set the current hypervisor CR3. */
4819 CPUMSetHyperCR3(pVM, PGMGetHyperCR3(pVM));
4820
4821# ifdef IN_RC
4822 VMMGCLogRestore(pVM, fLog);
4823# endif
4824
4825 /* Clean up the old CR3 root. */
4826 if (pOldShwPageCR3)
4827 {
4828 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4829# ifndef PGM_WITHOUT_MAPPINGS
4830 /* Remove the hypervisor mappings from the shadow page table. */
4831 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4832# endif
4833 /* Mark the page as unlocked; allow flushing again. */
4834 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4835
4836 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4837 }
4838
4839# endif
4840#endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
4841
4842 return rc;
4843}
4844
4845/**
4846 * Unmaps the shadow CR3.
4847 *
4848 * @returns VBox status, no specials.
4849 * @param pVM VM handle.
4850 */
4851PGM_BTH_DECL(int, UnmapCR3)(PVM pVM)
4852{
4853 LogFlow(("UnmapCR3\n"));
4854
4855 int rc = VINF_SUCCESS;
4856
4857 /* Update guest paging info. */
4858#if PGM_GST_TYPE == PGM_TYPE_32BIT
4859 pVM->pgm.s.pGst32BitPdR3 = 0;
4860#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4861 pVM->pgm.s.pGst32BitPdR0 = 0;
4862#endif
4863 pVM->pgm.s.pGst32BitPdRC = 0;
4864
4865#elif PGM_GST_TYPE == PGM_TYPE_PAE
4866 pVM->pgm.s.pGstPaePdptR3 = 0;
4867# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4868 pVM->pgm.s.pGstPaePdptR0 = 0;
4869# endif
4870 pVM->pgm.s.pGstPaePdptRC = 0;
4871 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4872 {
4873 pVM->pgm.s.apGstPaePDsR3[i] = 0;
4874# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4875 pVM->pgm.s.apGstPaePDsR0[i] = 0;
4876# endif
4877 pVM->pgm.s.apGstPaePDsRC[i] = 0;
4878 pVM->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4879 }
4880
4881#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4882 pVM->pgm.s.pGstAmd64Pml4R3 = 0;
4883# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4884 pVM->pgm.s.pGstAmd64Pml4R0 = 0;
4885# endif
4886# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4887 if (!HWACCMIsNestedPagingActive(pVM))
4888 {
4889 pVM->pgm.s.pShwRootR3 = 0;
4890# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4891 pVM->pgm.s.pShwRootR0 = 0;
4892# endif
4893 pVM->pgm.s.HCPhysShwCR3 = 0;
4894 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4895 {
4896 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4897 pgmPoolFreeByPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3), PGMPOOL_IDX_AMD64_CR3, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->GCPhys >> PAGE_SHIFT);
4898 pVM->pgm.s.pShwPageCR3R3 = 0;
4899 pVM->pgm.s.pShwPageCR3R0 = 0;
4900 }
4901 }
4902# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
4903
4904#else /* prot/real mode stub */
4905 /* nothing to do */
4906#endif
4907
4908#if defined(VBOX_WITH_PGMPOOL_PAGING_ONLY) && !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4909 /* Update shadow paging info. */
4910# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4911 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4912 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4913
4914# if PGM_GST_TYPE != PGM_TYPE_REAL
4915 Assert(!HWACCMIsNestedPagingActive(pVM));
4916# endif
4917
4918# ifndef PGM_WITHOUT_MAPPINGS
4919 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4920 /* Remove the hypervisor mappings from the shadow page table. */
4921 pgmMapDeactivateCR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4922# endif
4923
4924 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4925 {
4926 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4927
4928 Assert(pVM->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4929
4930 /* Mark the page as unlocked; allow flushing again. */
4931 pgmPoolUnlockPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4932
4933 pgmPoolFreeByPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3), pVM->pgm.s.iShwUser, pVM->pgm.s.iShwUserTable);
4934 pVM->pgm.s.pShwPageCR3R3 = 0;
4935 pVM->pgm.s.pShwPageCR3R0 = 0;
4936 pVM->pgm.s.pShwPageCR3RC = 0;
4937 pVM->pgm.s.iShwUser = 0;
4938 pVM->pgm.s.iShwUserTable = 0;
4939 }
4940# endif
4941#endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY && !IN_RC*/
4942
4943 return rc;
4944}
4945
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette