VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 17561

Last change on this file since 17561 was 17561, checked in by vboxsync, 16 years ago

VBOX_WITH_PGMPOOL_PAGING_ONLY: doesn't apply here

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 213.0 KB
Line 
1/* $Id: PGMAllBth.h 17561 2009-03-09 10:31:28Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27__BEGIN_DECLS
28PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42PGM_BTH_DECL(int, MapCR3)(PVM pVM, RTGCPHYS GCPhysCR3);
43PGM_BTH_DECL(int, UnmapCR3)(PVM pVM);
44__END_DECLS
45
46
47/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
48#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
49# error "Invalid combination; PAE guest implies PAE shadow"
50#endif
51
52#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
53 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
54# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
55#endif
56
57#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
58 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
59# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
60#endif
61
62#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
63 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
64# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
65#endif
66
67#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
68# define PGM_WITHOUT_MAPPINGS
69#endif
70
71
72#ifndef IN_RING3
73/**
74 * #PF Handler for raw-mode guest execution.
75 *
76 * @returns VBox status code (appropriate for trap handling and GC return).
77 * @param pVM VM Handle.
78 * @param uErr The trap error code.
79 * @param pRegFrame Trap register frame.
80 * @param pvFault The fault address.
81 */
82PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
83{
84# if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY) && defined(VBOX_STRICT)
85 PGMDynCheckLocks(pVM);
86# endif
87
88# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
89 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
90 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
91
92# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
93 /*
94 * Hide the instruction fetch trap indicator for now.
95 */
96 /** @todo NXE will change this and we must fix NXE in the switcher too! */
97 if (uErr & X86_TRAP_PF_ID)
98 {
99 uErr &= ~X86_TRAP_PF_ID;
100 TRPMSetErrorCode(pVM, uErr);
101 }
102# endif
103
104 /*
105 * Get PDs.
106 */
107 int rc;
108# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
109# if PGM_GST_TYPE == PGM_TYPE_32BIT
110 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
111 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
112
113# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
114
115# if PGM_GST_TYPE == PGM_TYPE_PAE
116 unsigned iPDSrc;
117# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
118 X86PDPE PdpeSrc;
119 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
120# else
121 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, pvFault, &iPDSrc, NULL);
122# endif
123
124# elif PGM_GST_TYPE == PGM_TYPE_AMD64
125 unsigned iPDSrc;
126 PX86PML4E pPml4eSrc;
127 X86PDPE PdpeSrc;
128 PGSTPD pPDSrc;
129
130 pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
131 Assert(pPml4eSrc);
132# endif
133
134 /* Quick check for a valid guest trap. (PAE & AMD64) */
135 if (!pPDSrc)
136 {
137# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
138 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK));
139# else
140 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK));
141# endif
142 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2GuestTrap; });
143 TRPMSetErrorCode(pVM, uErr);
144 return VINF_EM_RAW_GUEST_TRAP;
145 }
146# endif
147
148# else /* !PGM_WITH_PAGING */
149 PGSTPD pPDSrc = NULL;
150 const unsigned iPDSrc = 0;
151# endif /* !PGM_WITH_PAGING */
152
153
154# if PGM_SHW_TYPE == PGM_TYPE_32BIT
155 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
156 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVM->pgm.s);
157
158# elif PGM_SHW_TYPE == PGM_TYPE_PAE
159 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
160
161# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
162 PX86PDPAE pPDDst;
163# if PGM_GST_TYPE != PGM_TYPE_PAE
164 X86PDPE PdpeSrc;
165
166 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
167 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
168# endif
169 rc = pgmShwSyncPaePDPtr(pVM, pvFault, &PdpeSrc, &pPDDst);
170 if (rc != VINF_SUCCESS)
171 {
172 AssertRC(rc);
173 return rc;
174 }
175 Assert(pPDDst);
176
177# else
178 PX86PDPAE pPDDst = pgmShwGetPaePDPtr(&pVM->pgm.s, pvFault);
179
180 /* Did we mark the PDPT as not present in SyncCR3? */
181 unsigned iPdpt = (pvFault >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
182 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
183# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
184 if (!pPdptDst->a[iPdpt].n.u1Present)
185 pPdptDst->a[iPdpt].n.u1Present = 1;
186# endif
187# endif
188
189# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
190 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
191 PX86PDPAE pPDDst;
192# if PGM_GST_TYPE == PGM_TYPE_PROT
193 /* AMD-V nested paging */
194 X86PML4E Pml4eSrc;
195 X86PDPE PdpeSrc;
196 PX86PML4E pPml4eSrc = &Pml4eSrc;
197
198 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
199 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
200 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
201# endif
202
203 rc = pgmShwSyncLongModePDPtr(pVM, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
204 if (rc != VINF_SUCCESS)
205 {
206 AssertRC(rc);
207 return rc;
208 }
209 Assert(pPDDst);
210
211# elif PGM_SHW_TYPE == PGM_TYPE_EPT
212 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
213 PEPTPD pPDDst;
214
215 rc = pgmShwGetEPTPDPtr(pVM, pvFault, NULL, &pPDDst);
216 if (rc != VINF_SUCCESS)
217 {
218 AssertRC(rc);
219 return rc;
220 }
221 Assert(pPDDst);
222# endif
223
224# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
225 /*
226 * If we successfully correct the write protection fault due to dirty bit
227 * tracking, or this page fault is a genuine one, then return immediately.
228 */
229 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
230 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
231 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
232 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
233 || rc == VINF_EM_RAW_GUEST_TRAP)
234 {
235 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
236 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVM->pgm.s.StatRZTrap0eTime2GuestTrap; });
237 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
238 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
239 }
240
241 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0ePD[iPDSrc]);
242# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
243
244 /*
245 * A common case is the not-present error caused by lazy page table syncing.
246 *
247 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
248 * so we can safely assume that the shadow PT is present when calling SyncPage later.
249 *
250 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
251 * of mapping conflict and defer to SyncCR3 in R3.
252 * (Again, we do NOT support access handlers for non-present guest pages.)
253 *
254 */
255# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
256 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
257# else
258 GSTPDE PdeSrc;
259 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
260 PdeSrc.n.u1Present = 1;
261 PdeSrc.n.u1Write = 1;
262 PdeSrc.n.u1Accessed = 1;
263 PdeSrc.n.u1User = 1;
264# endif
265 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
266 && !pPDDst->a[iPDDst].n.u1Present
267 && PdeSrc.n.u1Present
268 )
269
270 {
271 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2SyncPT; });
272 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
273 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
274 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, pvFault);
275 if (RT_SUCCESS(rc))
276 {
277 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
278 return rc;
279 }
280 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
281 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
282 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
283 return VINF_PGM_SYNC_CR3;
284 }
285
286# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
287 /*
288 * Check if this address is within any of our mappings.
289 *
290 * This is *very* fast and it's gonna save us a bit of effort below and prevent
291 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
292 * (BTW, it's impossible to have physical access handlers in a mapping.)
293 */
294 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
295 {
296 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
297 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
298 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
299 {
300 if (pvFault < pMapping->GCPtr)
301 break;
302 if (pvFault - pMapping->GCPtr < pMapping->cb)
303 {
304 /*
305 * The first thing we check is if we've got an undetected conflict.
306 */
307 if (!pVM->pgm.s.fMappingsFixed)
308 {
309 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
310 while (iPT-- > 0)
311 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
312 {
313 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eConflicts);
314 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
315 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
316 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
317 return VINF_PGM_SYNC_CR3;
318 }
319 }
320
321 /*
322 * Check if the fault address is in a virtual page access handler range.
323 */
324 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
325 if ( pCur
326 && pvFault - pCur->Core.Key < pCur->cb
327 && uErr & X86_TRAP_PF_RW)
328 {
329# ifdef IN_RC
330 STAM_PROFILE_START(&pCur->Stat, h);
331 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
332 STAM_PROFILE_STOP(&pCur->Stat, h);
333# else
334 AssertFailed();
335 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
336# endif
337 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersMapping);
338 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
339 return rc;
340 }
341
342 /*
343 * Pretend we're not here and let the guest handle the trap.
344 */
345 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
346 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eGuestPFMapping);
347 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
348 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
349 return VINF_EM_RAW_GUEST_TRAP;
350 }
351 }
352 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
353 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
354# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
355
356 /*
357 * Check if this fault address is flagged for special treatment,
358 * which means we'll have to figure out the physical address and
359 * check flags associated with it.
360 *
361 * ASSUME that we can limit any special access handling to pages
362 * in page tables which the guest believes to be present.
363 */
364 if (PdeSrc.n.u1Present)
365 {
366 RTGCPHYS GCPhys = NIL_RTGCPHYS;
367
368# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
369# if PGM_GST_TYPE == PGM_TYPE_AMD64
370 bool fBigPagesSupported = true;
371# else
372 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
373# endif
374 if ( PdeSrc.b.u1Size
375 && fBigPagesSupported)
376 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
377 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
378 else
379 {
380 PGSTPT pPTSrc;
381 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
382 if (RT_SUCCESS(rc))
383 {
384 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
385 if (pPTSrc->a[iPTESrc].n.u1Present)
386 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
387 }
388 }
389# else
390 /* No paging so the fault address is the physical address */
391 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
392# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
393
394 /*
395 * If we have a GC address we'll check if it has any flags set.
396 */
397 if (GCPhys != NIL_RTGCPHYS)
398 {
399 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
400
401 PPGMPAGE pPage;
402 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
403 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
404 {
405 if ( PGM_PAGE_HAS_ACTIVE_PHYSICAL_HANDLERS(pPage)
406 || PGM_PAGE_HAS_ACTIVE_VIRTUAL_HANDLERS(pPage))
407 {
408 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
409 {
410 /*
411 * Physical page access handler.
412 */
413 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
414 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
415 if (pCur)
416 {
417# ifdef PGM_SYNC_N_PAGES
418 /*
419 * If the region is write protected and we got a page not present fault, then sync
420 * the pages. If the fault was caused by a read, then restart the instruction.
421 * In case of write access continue to the GC write handler.
422 *
423 * ASSUMES that there is only one handler per page or that they have similar write properties.
424 */
425 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
426 && !(uErr & X86_TRAP_PF_P))
427 {
428 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
429 if ( RT_FAILURE(rc)
430 || !(uErr & X86_TRAP_PF_RW)
431 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
432 {
433 AssertRC(rc);
434 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
435 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
436 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
437 return rc;
438 }
439 }
440# endif
441
442 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
443 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
444 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
445
446# if defined(IN_RC) || defined(IN_RING0)
447 if (pCur->CTX_SUFF(pfnHandler))
448 {
449 STAM_PROFILE_START(&pCur->Stat, h);
450 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pCur->CTX_SUFF(pvUser));
451 STAM_PROFILE_STOP(&pCur->Stat, h);
452 }
453 else
454# endif
455 rc = VINF_EM_RAW_EMULATE_INSTR;
456 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersPhysical);
457 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
458 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndPhys; });
459 return rc;
460 }
461 }
462# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
463 else
464 {
465# ifdef PGM_SYNC_N_PAGES
466 /*
467 * If the region is write protected and we got a page not present fault, then sync
468 * the pages. If the fault was caused by a read, then restart the instruction.
469 * In case of write access continue to the GC write handler.
470 */
471 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
472 && !(uErr & X86_TRAP_PF_P))
473 {
474 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
475 if ( RT_FAILURE(rc)
476 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
477 || !(uErr & X86_TRAP_PF_RW))
478 {
479 AssertRC(rc);
480 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
481 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
482 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
483 return rc;
484 }
485 }
486# endif
487 /*
488 * Ok, it's an virtual page access handler.
489 *
490 * Since it's faster to search by address, we'll do that first
491 * and then retry by GCPhys if that fails.
492 */
493 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
494 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
495 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
496 */
497 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
498 if (pCur)
499 {
500 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
501 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
502 || !(uErr & X86_TRAP_PF_P)
503 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
504 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
505
506 if ( pvFault - pCur->Core.Key < pCur->cb
507 && ( uErr & X86_TRAP_PF_RW
508 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
509 {
510# ifdef IN_RC
511 STAM_PROFILE_START(&pCur->Stat, h);
512 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
513 STAM_PROFILE_STOP(&pCur->Stat, h);
514# else
515 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
516# endif
517 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtual);
518 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
519 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
520 return rc;
521 }
522 /* Unhandled part of a monitored page */
523 }
524 else
525 {
526 /* Check by physical address. */
527 PPGMVIRTHANDLER pCur;
528 unsigned iPage;
529 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
530 &pCur, &iPage);
531 Assert(RT_SUCCESS(rc) || !pCur);
532 if ( pCur
533 && ( uErr & X86_TRAP_PF_RW
534 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
535 {
536 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
537# ifdef IN_RC
538 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
539 Assert(off < pCur->cb);
540 STAM_PROFILE_START(&pCur->Stat, h);
541 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
542 STAM_PROFILE_STOP(&pCur->Stat, h);
543# else
544 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
545# endif
546 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
547 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
548 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
549 return rc;
550 }
551 }
552 }
553# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
554
555 /*
556 * There is a handled area of the page, but this fault doesn't belong to it.
557 * We must emulate the instruction.
558 *
559 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
560 * we first check if this was a page-not-present fault for a page with only
561 * write access handlers. Restart the instruction if it wasn't a write access.
562 */
563 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersUnhandled);
564
565 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
566 && !(uErr & X86_TRAP_PF_P))
567 {
568 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
569 if ( RT_FAILURE(rc)
570 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
571 || !(uErr & X86_TRAP_PF_RW))
572 {
573 AssertRC(rc);
574 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
575 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
576 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
577 return rc;
578 }
579 }
580
581 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
582 * It's writing to an unhandled part of the LDT page several million times.
583 */
584 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
585 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
586 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
587 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndUnhandled; });
588 return rc;
589 } /* if any kind of handler */
590
591# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
592 if (uErr & X86_TRAP_PF_P)
593 {
594 /*
595 * The page isn't marked, but it might still be monitored by a virtual page access handler.
596 * (ASSUMES no temporary disabling of virtual handlers.)
597 */
598 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
599 * we should correct both the shadow page table and physical memory flags, and not only check for
600 * accesses within the handler region but for access to pages with virtual handlers. */
601 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
602 if (pCur)
603 {
604 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
605 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
606 || !(uErr & X86_TRAP_PF_P)
607 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
608 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
609
610 if ( pvFault - pCur->Core.Key < pCur->cb
611 && ( uErr & X86_TRAP_PF_RW
612 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
613 {
614# ifdef IN_RC
615 STAM_PROFILE_START(&pCur->Stat, h);
616 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
617 STAM_PROFILE_STOP(&pCur->Stat, h);
618# else
619 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
620# endif
621 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
622 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
623 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
624 return rc;
625 }
626 }
627 }
628# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
629 }
630 else
631 {
632 /*
633 * When the guest accesses invalid physical memory (e.g. probing
634 * of RAM or accessing a remapped MMIO range), then we'll fall
635 * back to the recompiler to emulate the instruction.
636 */
637 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
638 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersInvalid);
639 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
640 return VINF_EM_RAW_EMULATE_INSTR;
641 }
642
643 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
644
645# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
646 /*
647 * We are here only if page is present in Guest page tables and
648 * trap is not handled by our handlers.
649 *
650 * Check it for page out-of-sync situation.
651 */
652 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
653
654 if (!(uErr & X86_TRAP_PF_P))
655 {
656 /*
657 * Page is not present in our page tables.
658 * Try to sync it!
659 * BTW, fPageShw is invalid in this branch!
660 */
661 if (uErr & X86_TRAP_PF_US)
662 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
663 else /* supervisor */
664 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
665
666# if defined(LOG_ENABLED) && !defined(IN_RING0)
667 RTGCPHYS GCPhys;
668 uint64_t fPageGst;
669 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
670 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%RGp scan=%d\n",
671 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)));
672# endif /* LOG_ENABLED */
673
674# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
675 if (CPUMGetGuestCPL(pVM, pRegFrame) == 0)
676 {
677 uint64_t fPageGst;
678 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
679 if ( RT_SUCCESS(rc)
680 && !(fPageGst & X86_PTE_US))
681 {
682 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
683 if ( pvFault == (RTGCPTR)pRegFrame->eip
684 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
685# ifdef CSAM_DETECT_NEW_CODE_PAGES
686 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
687 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)) /* any new code we encounter here */
688# endif /* CSAM_DETECT_NEW_CODE_PAGES */
689 )
690 {
691 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
692 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
693 if (rc != VINF_SUCCESS)
694 {
695 /*
696 * CSAM needs to perform a job in ring 3.
697 *
698 * Sync the page before going to the host context; otherwise we'll end up in a loop if
699 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
700 */
701 LogFlow(("CSAM ring 3 job\n"));
702 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, 1, uErr);
703 AssertRC(rc2);
704
705 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
706 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2CSAM; });
707 return rc;
708 }
709 }
710# ifdef CSAM_DETECT_NEW_CODE_PAGES
711 else if ( uErr == X86_TRAP_PF_RW
712 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
713 && pRegFrame->ecx < 0x10000)
714 {
715 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
716 * to detect loading of new code pages.
717 */
718
719 /*
720 * Decode the instruction.
721 */
722 RTGCPTR PC;
723 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
724 if (rc == VINF_SUCCESS)
725 {
726 DISCPUSTATE Cpu;
727 uint32_t cbOp;
728 rc = EMInterpretDisasOneEx(pVM, PC, pRegFrame, &Cpu, &cbOp);
729
730 /* For now we'll restrict this to rep movsw/d instructions */
731 if ( rc == VINF_SUCCESS
732 && Cpu.pCurInstr->opcode == OP_MOVSWD
733 && (Cpu.prefix & PREFIX_REP))
734 {
735 CSAMMarkPossibleCodePage(pVM, pvFault);
736 }
737 }
738 }
739# endif /* CSAM_DETECT_NEW_CODE_PAGES */
740
741 /*
742 * Mark this page as safe.
743 */
744 /** @todo not correct for pages that contain both code and data!! */
745 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
746 CSAMMarkPage(pVM, (RTRCPTR)pvFault, true);
747 }
748 }
749# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
750 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
751 if (RT_SUCCESS(rc))
752 {
753 /* The page was successfully synced, return to the guest. */
754 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
755 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSync; });
756 return VINF_SUCCESS;
757 }
758 }
759 else /* uErr & X86_TRAP_PF_P: */
760 {
761 /*
762 * Write protected pages is make writable when the guest makes the first
763 * write to it. This happens for pages that are shared, write monitored
764 * and not yet allocated.
765 *
766 * Also, a side effect of not flushing global PDEs are out of sync pages due
767 * to physical monitored regions, that are no longer valid.
768 * Assume for now it only applies to the read/write flag.
769 */
770 if (RT_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
771 {
772# ifdef VBOX_WITH_NEW_PHYS_CODE
773 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
774 {
775 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n",
776 GCPhys, pPage, pvFault, uErr));
777 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
778 if (rc != VINF_SUCCESS)
779 {
780 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
781 return rc;
782 }
783 }
784 /// @todo count the above case; else
785# endif /* VBOX_WITH_NEW_PHYS_CODE */
786 if (uErr & X86_TRAP_PF_US)
787 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
788 else /* supervisor */
789 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
790
791 /*
792 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
793 * page is not present, which is not true in this case.
794 */
795 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, 1, uErr);
796 if (RT_SUCCESS(rc))
797 {
798 /*
799 * Page was successfully synced, return to guest.
800 */
801# ifdef VBOX_STRICT
802 RTGCPHYS GCPhys;
803 uint64_t fPageGst;
804 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
805 Assert(RT_SUCCESS(rc) && fPageGst & X86_PTE_RW);
806 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
807
808 uint64_t fPageShw;
809 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
810 AssertMsg(RT_SUCCESS(rc) && fPageShw & X86_PTE_RW, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
811# endif /* VBOX_STRICT */
812 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
813 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
814 return VINF_SUCCESS;
815 }
816
817 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
818 if ( CPUMGetGuestCPL(pVM, pRegFrame) == 0
819 && ((CPUMGetGuestCR0(pVM) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG)
820 && (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P))
821 {
822 uint64_t fPageGst;
823 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
824 if ( RT_SUCCESS(rc)
825 && !(fPageGst & X86_PTE_RW))
826 {
827 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
828 if (RT_SUCCESS(rc))
829 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eWPEmulInRZ);
830 else
831 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eWPEmulToR3);
832 return rc;
833 }
834 AssertMsgFailed(("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
835 }
836 }
837
838# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
839# ifdef VBOX_STRICT
840 /*
841 * Check for VMM page flags vs. Guest page flags consistency.
842 * Currently only for debug purposes.
843 */
844 if (RT_SUCCESS(rc))
845 {
846 /* Get guest page flags. */
847 uint64_t fPageGst;
848 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
849 if (RT_SUCCESS(rc))
850 {
851 uint64_t fPageShw;
852 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
853
854 /*
855 * Compare page flags.
856 * Note: we have AVL, A, D bits desynched.
857 */
858 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
859 ("Page flags mismatch! pvFault=%RGv GCPhys=%RGp fPageShw=%08llx fPageGst=%08llx\n", pvFault, GCPhys, fPageShw, fPageGst));
860 }
861 else
862 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
863 }
864 else
865 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
866# endif /* VBOX_STRICT */
867# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
868 }
869 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
870# endif /* PGM_OUT_OF_SYNC_IN_GC */
871 }
872 else /* GCPhys == NIL_RTGCPHYS */
873 {
874 /*
875 * Page not present in Guest OS or invalid page table address.
876 * This is potential virtual page access handler food.
877 *
878 * For the present we'll say that our access handlers don't
879 * work for this case - we've already discarded the page table
880 * not present case which is identical to this.
881 *
882 * When we perchance find we need this, we will probably have AVL
883 * trees (offset based) to operate on and we can measure their speed
884 * agains mapping a page table and probably rearrange this handling
885 * a bit. (Like, searching virtual ranges before checking the
886 * physical address.)
887 */
888 }
889 }
890 /* else: !present (guest) */
891
892
893# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
894 /*
895 * Conclusion, this is a guest trap.
896 */
897 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
898 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eGuestPFUnh);
899 return VINF_EM_RAW_GUEST_TRAP;
900# else
901 /* present, but not a monitored page; perhaps the guest is probing physical memory */
902 return VINF_EM_RAW_EMULATE_INSTR;
903# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
904
905
906# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
907
908 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
909 return VERR_INTERNAL_ERROR;
910# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
911}
912#endif /* !IN_RING3 */
913
914
915/**
916 * Emulation of the invlpg instruction.
917 *
918 *
919 * @returns VBox status code.
920 *
921 * @param pVM VM handle.
922 * @param GCPtrPage Page to invalidate.
923 *
924 * @remark ASSUMES that the guest is updating before invalidating. This order
925 * isn't required by the CPU, so this is speculative and could cause
926 * trouble.
927 *
928 * @todo Flush page or page directory only if necessary!
929 * @todo Add a #define for simply invalidating the page.
930 */
931PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCPTR GCPtrPage)
932{
933#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
934 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
935 && PGM_SHW_TYPE != PGM_TYPE_EPT
936 int rc;
937
938 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
939 /*
940 * Get the shadow PD entry and skip out if this PD isn't present.
941 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
942 */
943# if PGM_SHW_TYPE == PGM_TYPE_32BIT
944 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
945 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
946
947# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
948 /* Fetch the pgm pool shadow descriptor. */
949 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
950 Assert(pShwPde);
951# endif
952
953# elif PGM_SHW_TYPE == PGM_TYPE_PAE
954 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
955 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
956
957 /* If the shadow PDPE isn't present, then skip the invalidate. */
958 if (!pPdptDst->a[iPdpt].n.u1Present)
959 {
960 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
961 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
962 return VINF_SUCCESS;
963 }
964
965# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
966 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
967 PPGMPOOLPAGE pShwPde;
968 PX86PDPAE pPDDst;
969
970 /* Fetch the pgm pool shadow descriptor. */
971 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
972 AssertRCSuccessReturn(rc, rc);
973 Assert(pShwPde);
974
975 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
976 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
977# else
978 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) /*& SHW_PD_MASK - pool index only atm! */;
979 PX86PDEPAE pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
980# endif
981
982# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
983 /* PML4 */
984# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
985 AssertReturn(pVM->pgm.s.pShwRootR3, VERR_INTERNAL_ERROR);
986# endif
987
988 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
989 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
990 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
991 PX86PDPAE pPDDst;
992 PX86PDPT pPdptDst;
993 PX86PML4E pPml4eDst;
994 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
995 if (rc != VINF_SUCCESS)
996 {
997 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
998 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
999 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
1000 PGM_INVL_GUEST_TLBS();
1001 return VINF_SUCCESS;
1002 }
1003 Assert(pPDDst);
1004
1005 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1006 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1007
1008 if (!pPdpeDst->n.u1Present)
1009 {
1010 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1011 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
1012 PGM_INVL_GUEST_TLBS();
1013 return VINF_SUCCESS;
1014 }
1015
1016# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1017
1018 const SHWPDE PdeDst = *pPdeDst;
1019 if (!PdeDst.n.u1Present)
1020 {
1021 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1022 return VINF_SUCCESS;
1023 }
1024
1025 /*
1026 * Get the guest PD entry and calc big page.
1027 */
1028# if PGM_GST_TYPE == PGM_TYPE_32BIT
1029 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
1030 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1031 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1032# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1033 unsigned iPDSrc;
1034# if PGM_GST_TYPE == PGM_TYPE_PAE
1035 X86PDPE PdpeSrc;
1036 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1037# else /* AMD64 */
1038 PX86PML4E pPml4eSrc;
1039 X86PDPE PdpeSrc;
1040 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1041# endif
1042 GSTPDE PdeSrc;
1043
1044 if (pPDSrc)
1045 PdeSrc = pPDSrc->a[iPDSrc];
1046 else
1047 PdeSrc.u = 0;
1048# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1049
1050# if PGM_GST_TYPE == PGM_TYPE_AMD64
1051 const bool fIsBigPage = PdeSrc.b.u1Size;
1052# else
1053 const bool fIsBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1054# endif
1055
1056# ifdef IN_RING3
1057 /*
1058 * If a CR3 Sync is pending we may ignore the invalidate page operation
1059 * depending on the kind of sync and if it's a global page or not.
1060 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1061 */
1062# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1063 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
1064 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
1065 && fIsBigPage
1066 && PdeSrc.b.u1Global
1067 )
1068 )
1069# else
1070 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1071# endif
1072 {
1073 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1074 return VINF_SUCCESS;
1075 }
1076# endif /* IN_RING3 */
1077
1078# if PGM_GST_TYPE == PGM_TYPE_AMD64
1079 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1080
1081 /* Fetch the pgm pool shadow descriptor. */
1082 PPGMPOOLPAGE pShwPdpt = pgmPoolGetPageByHCPhys(pVM, pPml4eDst->u & X86_PML4E_PG_MASK);
1083 Assert(pShwPdpt);
1084
1085 /* Fetch the pgm pool shadow descriptor. */
1086 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1087 Assert(pShwPde);
1088
1089 Assert(pPml4eDst->n.u1Present && (pPml4eDst->u & SHW_PDPT_MASK));
1090 RTGCPHYS GCPhysPdpt = pPml4eSrc->u & X86_PML4E_PG_MASK;
1091
1092 if ( !pPml4eSrc->n.u1Present
1093 || pShwPdpt->GCPhys != GCPhysPdpt)
1094 {
1095 LogFlow(("InvalidatePage: Out-of-sync PML4E (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1096 GCPtrPage, pShwPdpt->GCPhys, GCPhysPdpt, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1097 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1098 pPml4eDst->u = 0;
1099 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1100 PGM_INVL_GUEST_TLBS();
1101 return VINF_SUCCESS;
1102 }
1103 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
1104 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
1105 {
1106 /*
1107 * Mark not present so we can resync the PML4E when it's used.
1108 */
1109 LogFlow(("InvalidatePage: Out-of-sync PML4E at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1110 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1111 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1112 pPml4eDst->u = 0;
1113 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1114 PGM_INVL_GUEST_TLBS();
1115 }
1116 else if (!pPml4eSrc->n.u1Accessed)
1117 {
1118 /*
1119 * Mark not present so we can set the accessed bit.
1120 */
1121 LogFlow(("InvalidatePage: Out-of-sync PML4E (A) at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1122 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1123 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1124 pPml4eDst->u = 0;
1125 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1126 PGM_INVL_GUEST_TLBS();
1127 }
1128
1129 /* Check if the PDPT entry has changed. */
1130 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
1131 RTGCPHYS GCPhysPd = PdpeSrc.u & GST_PDPE_PG_MASK;
1132 if ( !PdpeSrc.n.u1Present
1133 || pShwPde->GCPhys != GCPhysPd)
1134 {
1135 LogFlow(("InvalidatePage: Out-of-sync PDPE (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1136 GCPtrPage, pShwPde->GCPhys, GCPhysPd, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1137 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1138 pPdpeDst->u = 0;
1139 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1140 PGM_INVL_GUEST_TLBS();
1141 return VINF_SUCCESS;
1142 }
1143 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
1144 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
1145 {
1146 /*
1147 * Mark not present so we can resync the PDPTE when it's used.
1148 */
1149 LogFlow(("InvalidatePage: Out-of-sync PDPE at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1150 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1151 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1152 pPdpeDst->u = 0;
1153 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1154 PGM_INVL_GUEST_TLBS();
1155 }
1156 else if (!PdpeSrc.lm.u1Accessed)
1157 {
1158 /*
1159 * Mark not present so we can set the accessed bit.
1160 */
1161 LogFlow(("InvalidatePage: Out-of-sync PDPE (A) at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1162 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1163 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1164 pPdpeDst->u = 0;
1165 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1166 PGM_INVL_GUEST_TLBS();
1167 }
1168# endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1169
1170# if PGM_GST_TYPE == PGM_TYPE_PAE && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1171 /*
1172 * Update the shadow PDPE and free all the shadow PD entries if the PDPE is marked not present.
1173 * Note: This shouldn't actually be necessary as we monitor the PDPT page for changes.
1174 */
1175 if (!pPDSrc)
1176 {
1177 /* Guest PDPE not present */
1178 PX86PDPAE pPDDst = pgmShwGetPaePDPtr(&pVM->pgm.s, GCPtrPage);
1179 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1180
1181 Assert(!PdpeSrc.n.u1Present);
1182 LogFlow(("InvalidatePage: guest PDPE %d not present; clear shw pdpe\n", iPdpt));
1183
1184 /* for each page directory entry */
1185 for (unsigned iPD = 0; iPD < X86_PG_PAE_ENTRIES; iPD++)
1186 {
1187 if ( pPDDst->a[iPD].n.u1Present
1188 && !(pPDDst->a[iPD].u & PGM_PDFLAGS_MAPPING))
1189 {
1190 pgmPoolFree(pVM, pPDDst->a[iPD].u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPdpt * X86_PG_PAE_ENTRIES + iPD);
1191 pPDDst->a[iPD].u = 0;
1192 }
1193 }
1194 if (!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING))
1195 pPdptDst->a[iPdpt].n.u1Present = 0;
1196 PGM_INVL_GUEST_TLBS();
1197 }
1198 AssertMsg(pVM->pgm.s.fMappingsFixed || (PdpeSrc.u & X86_PDPE_PG_MASK) == pVM->pgm.s.aGCPhysGstPaePDsMonitored[iPdpt], ("%RGp vs %RGp (mon)\n", (PdpeSrc.u & X86_PDPE_PG_MASK), pVM->pgm.s.aGCPhysGstPaePDsMonitored[iPdpt]));
1199# endif
1200
1201
1202 /*
1203 * Deal with the Guest PDE.
1204 */
1205 rc = VINF_SUCCESS;
1206 if (PdeSrc.n.u1Present)
1207 {
1208 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1209 {
1210 /*
1211 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1212 */
1213 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1214 Assert(PGMGetGuestMode(pVM) <= PGMMODE_PAE);
1215 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
1216 }
1217 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
1218 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1219 {
1220 /*
1221 * Mark not present so we can resync the PDE when it's used.
1222 */
1223 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1224 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1225# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1226 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1227# else
1228 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1229# endif
1230 pPdeDst->u = 0;
1231 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1232 PGM_INVL_GUEST_TLBS();
1233 }
1234 else if (!PdeSrc.n.u1Accessed)
1235 {
1236 /*
1237 * Mark not present so we can set the accessed bit.
1238 */
1239 LogFlow(("InvalidatePage: Out-of-sync (A) at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1240 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1241# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1242 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1243# else
1244 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1245# endif
1246 pPdeDst->u = 0;
1247 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1248 PGM_INVL_GUEST_TLBS();
1249 }
1250 else if (!fIsBigPage)
1251 {
1252 /*
1253 * 4KB - page.
1254 */
1255 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1256 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1257# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1258 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1259 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1260# endif
1261 if (pShwPage->GCPhys == GCPhys)
1262 {
1263# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1264 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1265 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1266 if (pPT->a[iPTEDst].n.u1Present)
1267 {
1268# ifdef PGMPOOL_WITH_USER_TRACKING
1269 /* This is very unlikely with caching/monitoring enabled. */
1270 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1271# endif
1272 pPT->a[iPTEDst].u = 0;
1273 }
1274# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1275 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
1276 if (RT_SUCCESS(rc))
1277 rc = VINF_SUCCESS;
1278# endif
1279 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1280 PGM_INVL_PG(GCPtrPage);
1281 }
1282 else
1283 {
1284 /*
1285 * The page table address changed.
1286 */
1287 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1288 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1289# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1290 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1291# else
1292 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1293# endif
1294 pPdeDst->u = 0;
1295 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1296 PGM_INVL_GUEST_TLBS();
1297 }
1298 }
1299 else
1300 {
1301 /*
1302 * 2/4MB - page.
1303 */
1304 /* Before freeing the page, check if anything really changed. */
1305 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1306 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1307# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1308 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1309 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1310# endif
1311 if ( pShwPage->GCPhys == GCPhys
1312 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1313 {
1314 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1315 /** @todo PAT */
1316 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1317 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1318 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1319 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1320 {
1321 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1322 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1323 return VINF_SUCCESS;
1324 }
1325 }
1326
1327 /*
1328 * Ok, the page table is present and it's been changed in the guest.
1329 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1330 * We could do this for some flushes in GC too, but we need an algorithm for
1331 * deciding which 4MB pages containing code likely to be executed very soon.
1332 */
1333 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1334 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1335# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1336 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1337# else
1338 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1339# endif
1340 pPdeDst->u = 0;
1341 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1342 PGM_INVL_BIG_PG(GCPtrPage);
1343 }
1344 }
1345 else
1346 {
1347 /*
1348 * Page directory is not present, mark shadow PDE not present.
1349 */
1350 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1351 {
1352# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1353 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1354# else
1355 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1356# endif
1357 pPdeDst->u = 0;
1358 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1359 PGM_INVL_PG(GCPtrPage);
1360 }
1361 else
1362 {
1363 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1364 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1365 }
1366 }
1367
1368 return rc;
1369
1370#else /* guest real and protected mode */
1371 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1372 return VINF_SUCCESS;
1373#endif
1374}
1375
1376
1377#ifdef PGMPOOL_WITH_USER_TRACKING
1378/**
1379 * Update the tracking of shadowed pages.
1380 *
1381 * @param pVM The VM handle.
1382 * @param pShwPage The shadow page.
1383 * @param HCPhys The physical page we is being dereferenced.
1384 */
1385DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1386{
1387# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1388 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1389 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1390
1391 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1392 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1393 * 2. write protect all shadowed pages. I.e. implement caching.
1394 */
1395 /*
1396 * Find the guest address.
1397 */
1398 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1399 pRam;
1400 pRam = pRam->CTX_SUFF(pNext))
1401 {
1402 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1403 while (iPage-- > 0)
1404 {
1405 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1406 {
1407 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1408 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1409 pShwPage->cPresent--;
1410 pPool->cPresent--;
1411 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1412 return;
1413 }
1414 }
1415 }
1416
1417 for (;;)
1418 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1419# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1420 pShwPage->cPresent--;
1421 pVM->pgm.s.CTX_SUFF(pPool)->cPresent--;
1422# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1423}
1424
1425
1426/**
1427 * Update the tracking of shadowed pages.
1428 *
1429 * @param pVM The VM handle.
1430 * @param pShwPage The shadow page.
1431 * @param u16 The top 16-bit of the pPage->HCPhys.
1432 * @param pPage Pointer to the guest page. this will be modified.
1433 * @param iPTDst The index into the shadow table.
1434 */
1435DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1436{
1437# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1438 /*
1439 * Just deal with the simple first time here.
1440 */
1441 if (!u16)
1442 {
1443 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1444 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1445 }
1446 else
1447 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1448
1449 /* write back */
1450 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1451 PGM_PAGE_SET_TRACKING(pPage, u16);
1452
1453# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1454
1455 /* update statistics. */
1456 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1457 pShwPage->cPresent++;
1458 if (pShwPage->iFirstPresent > iPTDst)
1459 pShwPage->iFirstPresent = iPTDst;
1460}
1461#endif /* PGMPOOL_WITH_USER_TRACKING */
1462
1463
1464/**
1465 * Creates a 4K shadow page for a guest page.
1466 *
1467 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1468 * physical address. The PdeSrc argument only the flags are used. No page structured
1469 * will be mapped in this function.
1470 *
1471 * @param pVM VM handle.
1472 * @param pPteDst Destination page table entry.
1473 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1474 * Can safely assume that only the flags are being used.
1475 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1476 * @param pShwPage Pointer to the shadow page.
1477 * @param iPTDst The index into the shadow table.
1478 *
1479 * @remark Not used for 2/4MB pages!
1480 */
1481DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1482{
1483 if (PteSrc.n.u1Present)
1484 {
1485 /*
1486 * Find the ram range.
1487 */
1488 PPGMPAGE pPage;
1489 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1490 if (RT_SUCCESS(rc))
1491 {
1492#ifdef VBOX_WITH_NEW_PHYS_CODE
1493# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1494 /* Try make the page writable if necessary. */
1495 if ( PteSrc.n.u1Write
1496 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1497 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1498 {
1499 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1500 AssertRC(rc);
1501 }
1502# endif
1503#endif
1504
1505 /** @todo investiage PWT, PCD and PAT. */
1506 /*
1507 * Make page table entry.
1508 */
1509 SHWPTE PteDst;
1510 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1511 {
1512 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1513 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1514 {
1515#if PGM_SHW_TYPE == PGM_TYPE_EPT
1516 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1517 PteDst.n.u1Present = 1;
1518 PteDst.n.u1Execute = 1;
1519 PteDst.n.u1IgnorePAT = 1;
1520 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1521 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1522#else
1523 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1524 | PGM_PAGE_GET_HCPHYS(pPage);
1525#endif
1526 }
1527 else
1528 {
1529 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1530 PteDst.u = 0;
1531 }
1532 /** @todo count these two kinds. */
1533 }
1534 else
1535 {
1536#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1537 /*
1538 * If the page or page directory entry is not marked accessed,
1539 * we mark the page not present.
1540 */
1541 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1542 {
1543 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1544 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1545 PteDst.u = 0;
1546 }
1547 else
1548 /*
1549 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1550 * when the page is modified.
1551 */
1552 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1553 {
1554 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1555 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1556 | PGM_PAGE_GET_HCPHYS(pPage)
1557 | PGM_PTFLAGS_TRACK_DIRTY;
1558 }
1559 else
1560#endif
1561 {
1562 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1563#if PGM_SHW_TYPE == PGM_TYPE_EPT
1564 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1565 PteDst.n.u1Present = 1;
1566 PteDst.n.u1Write = 1;
1567 PteDst.n.u1Execute = 1;
1568 PteDst.n.u1IgnorePAT = 1;
1569 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1570 /* PteDst.n.u1Size = 0 */
1571#else
1572 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1573 | PGM_PAGE_GET_HCPHYS(pPage);
1574#endif
1575 }
1576 }
1577
1578#ifdef VBOX_WITH_NEW_PHYS_CODE
1579 /*
1580 * Make sure only allocated pages are mapped writable.
1581 */
1582 if ( PteDst.n.u1Write
1583 && PteDst.n.u1Present
1584 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1585 {
1586 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1587 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1588 }
1589#endif
1590
1591#ifdef PGMPOOL_WITH_USER_TRACKING
1592 /*
1593 * Keep user track up to date.
1594 */
1595 if (PteDst.n.u1Present)
1596 {
1597 if (!pPteDst->n.u1Present)
1598 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1599 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1600 {
1601 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1602 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1603 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1604 }
1605 }
1606 else if (pPteDst->n.u1Present)
1607 {
1608 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1609 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1610 }
1611#endif /* PGMPOOL_WITH_USER_TRACKING */
1612
1613 /*
1614 * Update statistics and commit the entry.
1615 */
1616#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1617 if (!PteSrc.n.u1Global)
1618 pShwPage->fSeenNonGlobal = true;
1619#endif
1620 *pPteDst = PteDst;
1621 }
1622 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1623 /** @todo count these. */
1624 }
1625 else
1626 {
1627 /*
1628 * Page not-present.
1629 */
1630 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1631#ifdef PGMPOOL_WITH_USER_TRACKING
1632 /* Keep user track up to date. */
1633 if (pPteDst->n.u1Present)
1634 {
1635 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1636 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1637 }
1638#endif /* PGMPOOL_WITH_USER_TRACKING */
1639 pPteDst->u = 0;
1640 /** @todo count these. */
1641 }
1642}
1643
1644
1645/**
1646 * Syncs a guest OS page.
1647 *
1648 * There are no conflicts at this point, neither is there any need for
1649 * page table allocations.
1650 *
1651 * @returns VBox status code.
1652 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1653 * @param pVM VM handle.
1654 * @param PdeSrc Page directory entry of the guest.
1655 * @param GCPtrPage Guest context page address.
1656 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1657 * @param uErr Fault error (X86_TRAP_PF_*).
1658 */
1659PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1660{
1661 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1662
1663#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1664 || PGM_GST_TYPE == PGM_TYPE_PAE \
1665 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1666 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1667 && PGM_SHW_TYPE != PGM_TYPE_EPT
1668
1669# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1670 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1671# endif
1672
1673 /*
1674 * Assert preconditions.
1675 */
1676 Assert(PdeSrc.n.u1Present);
1677 Assert(cPages);
1678 STAM_COUNTER_INC(&pVM->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1679
1680 /*
1681 * Get the shadow PDE, find the shadow page table in the pool.
1682 */
1683# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1684 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1685 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
1686
1687# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1688 /* Fetch the pgm pool shadow descriptor. */
1689 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
1690 Assert(pShwPde);
1691# endif
1692
1693# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1694
1695# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1696 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1697 PPGMPOOLPAGE pShwPde;
1698 PX86PDPAE pPDDst;
1699
1700 /* Fetch the pgm pool shadow descriptor. */
1701 int rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
1702 AssertRCSuccessReturn(rc, rc);
1703 Assert(pShwPde);
1704
1705 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1706 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1707# else
1708 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) /*& SHW_PD_MASK - only pool index atm! */;
1709 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1710 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s); NOREF(pPdptDst);
1711 PX86PDEPAE pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
1712 AssertReturn(pPdeDst, VERR_INTERNAL_ERROR);
1713# endif
1714# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1715 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1716 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1717 PX86PDPAE pPDDst;
1718 PX86PDPT pPdptDst;
1719
1720 int rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1721 AssertRCSuccessReturn(rc, rc);
1722 Assert(pPDDst && pPdptDst);
1723 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1724# endif
1725
1726 SHWPDE PdeDst = *pPdeDst;
1727 AssertMsg(PdeDst.n.u1Present, ("%p=%llx\n", pPdeDst, (uint64_t)PdeDst.u));
1728 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1729
1730# if PGM_GST_TYPE == PGM_TYPE_AMD64
1731 /* Fetch the pgm pool shadow descriptor. */
1732 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1733 Assert(pShwPde);
1734# endif
1735
1736 /*
1737 * Check that the page is present and that the shadow PDE isn't out of sync.
1738 */
1739# if PGM_GST_TYPE == PGM_TYPE_AMD64
1740 const bool fBigPage = PdeSrc.b.u1Size;
1741# else
1742 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1743# endif
1744 RTGCPHYS GCPhys;
1745 if (!fBigPage)
1746 {
1747 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1748# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1749 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1750 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1751# endif
1752 }
1753 else
1754 {
1755 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1756# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1757 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1758 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1759# endif
1760 }
1761 if ( pShwPage->GCPhys == GCPhys
1762 && PdeSrc.n.u1Present
1763 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1764 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1765# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1766 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1767# endif
1768 )
1769 {
1770 /*
1771 * Check that the PDE is marked accessed already.
1772 * Since we set the accessed bit *before* getting here on a #PF, this
1773 * check is only meant for dealing with non-#PF'ing paths.
1774 */
1775 if (PdeSrc.n.u1Accessed)
1776 {
1777 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1778 if (!fBigPage)
1779 {
1780 /*
1781 * 4KB Page - Map the guest page table.
1782 */
1783 PGSTPT pPTSrc;
1784 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1785 if (RT_SUCCESS(rc))
1786 {
1787# ifdef PGM_SYNC_N_PAGES
1788 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1789 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1790 {
1791 /*
1792 * This code path is currently only taken when the caller is PGMTrap0eHandler
1793 * for non-present pages!
1794 *
1795 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1796 * deal with locality.
1797 */
1798 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1799# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1800 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1801 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1802# else
1803 const unsigned offPTSrc = 0;
1804# endif
1805 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1806 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1807 iPTDst = 0;
1808 else
1809 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1810 for (; iPTDst < iPTDstEnd; iPTDst++)
1811 {
1812 if (!pPTDst->a[iPTDst].n.u1Present)
1813 {
1814 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1815 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1816 NOREF(GCPtrCurPage);
1817#ifndef IN_RING0
1818 /*
1819 * Assuming kernel code will be marked as supervisor - and not as user level
1820 * and executed using a conforming code selector - And marked as readonly.
1821 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1822 */
1823 PPGMPAGE pPage;
1824 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1825 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1826 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)GCPtrCurPage)
1827 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1828 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1829 )
1830#endif /* else: CSAM not active */
1831 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1832 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1833 GCPtrCurPage, PteSrc.n.u1Present,
1834 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1835 PteSrc.n.u1User & PdeSrc.n.u1User,
1836 (uint64_t)PteSrc.u,
1837 (uint64_t)pPTDst->a[iPTDst].u,
1838 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1839 }
1840 }
1841 }
1842 else
1843# endif /* PGM_SYNC_N_PAGES */
1844 {
1845 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1846 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1847 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1848 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1849 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1850 GCPtrPage, PteSrc.n.u1Present,
1851 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1852 PteSrc.n.u1User & PdeSrc.n.u1User,
1853 (uint64_t)PteSrc.u,
1854 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1855 }
1856 }
1857 else /* MMIO or invalid page: emulated in #PF handler. */
1858 {
1859 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1860 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1861 }
1862 }
1863 else
1864 {
1865 /*
1866 * 4/2MB page - lazy syncing shadow 4K pages.
1867 * (There are many causes of getting here, it's no longer only CSAM.)
1868 */
1869 /* Calculate the GC physical address of this 4KB shadow page. */
1870 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1871 /* Find ram range. */
1872 PPGMPAGE pPage;
1873 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1874 if (RT_SUCCESS(rc))
1875 {
1876# ifdef VBOX_WITH_NEW_PHYS_CODE
1877# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1878 /* Try make the page writable if necessary. */
1879 if ( PdeSrc.n.u1Write
1880 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1881 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1882 {
1883 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
1884 AssertRC(rc);
1885 }
1886# endif
1887# endif
1888
1889 /*
1890 * Make shadow PTE entry.
1891 */
1892 SHWPTE PteDst;
1893 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1894 | PGM_PAGE_GET_HCPHYS(pPage);
1895 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1896 {
1897 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1898 PteDst.n.u1Write = 0;
1899 else
1900 PteDst.u = 0;
1901 }
1902 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1903# ifdef PGMPOOL_WITH_USER_TRACKING
1904 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1905 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1906# endif
1907# ifdef VBOX_WITH_NEW_PHYS_CODE
1908 /* Make sure only allocated pages are mapped writable. */
1909 if ( PteDst.n.u1Write
1910 && PteDst.n.u1Present
1911 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1912 {
1913 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1914 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1915 }
1916# endif
1917
1918 pPTDst->a[iPTDst] = PteDst;
1919
1920
1921 /*
1922 * If the page is not flagged as dirty and is writable, then make it read-only
1923 * at PD level, so we can set the dirty bit when the page is modified.
1924 *
1925 * ASSUMES that page access handlers are implemented on page table entry level.
1926 * Thus we will first catch the dirty access and set PDE.D and restart. If
1927 * there is an access handler, we'll trap again and let it work on the problem.
1928 */
1929 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1930 * As for invlpg, it simply frees the whole shadow PT.
1931 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1932 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1933 {
1934 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1935 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1936 PdeDst.n.u1Write = 0;
1937 }
1938 else
1939 {
1940 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1941 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1942 }
1943 *pPdeDst = PdeDst;
1944 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1945 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1946 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1947 }
1948 else
1949 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1950 }
1951 return VINF_SUCCESS;
1952 }
1953 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1954 }
1955 else
1956 {
1957 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1958 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1959 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1960 }
1961
1962 /*
1963 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1964 * Yea, I'm lazy.
1965 */
1966 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1967# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1968 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1969# else
1970 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPDDst);
1971# endif
1972
1973 pPdeDst->u = 0;
1974 PGM_INVL_GUEST_TLBS();
1975 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1976
1977#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1978 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1979 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
1980
1981# ifdef PGM_SYNC_N_PAGES
1982 /*
1983 * Get the shadow PDE, find the shadow page table in the pool.
1984 */
1985# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1986 X86PDE PdeDst = pgmShwGet32BitPDE(&pVM->pgm.s, GCPtrPage);
1987
1988# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1989 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVM->pgm.s, GCPtrPage);
1990
1991# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1992 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1993 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1994 PX86PDPAE pPDDst;
1995 X86PDEPAE PdeDst;
1996 PX86PDPT pPdptDst;
1997
1998 int rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1999 AssertRCSuccessReturn(rc, rc);
2000 Assert(pPDDst && pPdptDst);
2001 PdeDst = pPDDst->a[iPDDst];
2002# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2003 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2004 PEPTPD pPDDst;
2005 EPTPDE PdeDst;
2006
2007 int rc = pgmShwGetEPTPDPtr(pVM, GCPtrPage, NULL, &pPDDst);
2008 if (rc != VINF_SUCCESS)
2009 {
2010 AssertRC(rc);
2011 return rc;
2012 }
2013 Assert(pPDDst);
2014 PdeDst = pPDDst->a[iPDDst];
2015# endif
2016 AssertMsg(PdeDst.n.u1Present, ("%#llx\n", (uint64_t)PdeDst.u));
2017 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
2018 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2019
2020 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2021 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
2022 {
2023 /*
2024 * This code path is currently only taken when the caller is PGMTrap0eHandler
2025 * for non-present pages!
2026 *
2027 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2028 * deal with locality.
2029 */
2030 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2031 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2032 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2033 iPTDst = 0;
2034 else
2035 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2036 for (; iPTDst < iPTDstEnd; iPTDst++)
2037 {
2038 if (!pPTDst->a[iPTDst].n.u1Present)
2039 {
2040 GSTPTE PteSrc;
2041
2042 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2043
2044 /* Fake the page table entry */
2045 PteSrc.u = GCPtrCurPage;
2046 PteSrc.n.u1Present = 1;
2047 PteSrc.n.u1Dirty = 1;
2048 PteSrc.n.u1Accessed = 1;
2049 PteSrc.n.u1Write = 1;
2050 PteSrc.n.u1User = 1;
2051
2052 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2053
2054 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2055 GCPtrCurPage, PteSrc.n.u1Present,
2056 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2057 PteSrc.n.u1User & PdeSrc.n.u1User,
2058 (uint64_t)PteSrc.u,
2059 (uint64_t)pPTDst->a[iPTDst].u,
2060 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2061 }
2062 else
2063 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2064 }
2065 }
2066 else
2067# endif /* PGM_SYNC_N_PAGES */
2068 {
2069 GSTPTE PteSrc;
2070 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2071 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2072
2073 /* Fake the page table entry */
2074 PteSrc.u = GCPtrCurPage;
2075 PteSrc.n.u1Present = 1;
2076 PteSrc.n.u1Dirty = 1;
2077 PteSrc.n.u1Accessed = 1;
2078 PteSrc.n.u1Write = 1;
2079 PteSrc.n.u1User = 1;
2080 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2081
2082 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2083 GCPtrPage, PteSrc.n.u1Present,
2084 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2085 PteSrc.n.u1User & PdeSrc.n.u1User,
2086 (uint64_t)PteSrc.u,
2087 (uint64_t)pPTDst->a[iPTDst].u,
2088 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2089 }
2090 return VINF_SUCCESS;
2091
2092#else
2093 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2094 return VERR_INTERNAL_ERROR;
2095#endif
2096}
2097
2098
2099#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2100/**
2101 * Investigate page fault and handle write protection page faults caused by
2102 * dirty bit tracking.
2103 *
2104 * @returns VBox status code.
2105 * @param pVM VM handle.
2106 * @param uErr Page fault error code.
2107 * @param pPdeDst Shadow page directory entry.
2108 * @param pPdeSrc Guest page directory entry.
2109 * @param GCPtrPage Guest context page address.
2110 */
2111PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2112{
2113 bool fWriteProtect = !!(CPUMGetGuestCR0(pVM) & X86_CR0_WP);
2114 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2115 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2116# if PGM_GST_TYPE == PGM_TYPE_AMD64
2117 bool fBigPagesSupported = true;
2118# else
2119 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2120# endif
2121# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2122 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
2123# endif
2124 unsigned uPageFaultLevel;
2125 int rc;
2126
2127 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2128 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2129
2130# if PGM_GST_TYPE == PGM_TYPE_PAE \
2131 || PGM_GST_TYPE == PGM_TYPE_AMD64
2132
2133# if PGM_GST_TYPE == PGM_TYPE_AMD64
2134 PX86PML4E pPml4eSrc;
2135 PX86PDPE pPdpeSrc;
2136
2137 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc);
2138 Assert(pPml4eSrc);
2139
2140 /*
2141 * Real page fault? (PML4E level)
2142 */
2143 if ( (uErr & X86_TRAP_PF_RSVD)
2144 || !pPml4eSrc->n.u1Present
2145 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
2146 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2147 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2148 )
2149 {
2150 uPageFaultLevel = 0;
2151 goto l_UpperLevelPageFault;
2152 }
2153 Assert(pPdpeSrc);
2154
2155# else /* PAE */
2156 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVM->pgm.s, GCPtrPage);
2157# endif /* PAE */
2158
2159 /*
2160 * Real page fault? (PDPE level)
2161 */
2162 if ( (uErr & X86_TRAP_PF_RSVD)
2163 || !pPdpeSrc->n.u1Present
2164# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2165 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
2166 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
2167 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2168# endif
2169 )
2170 {
2171 uPageFaultLevel = 1;
2172 goto l_UpperLevelPageFault;
2173 }
2174# endif
2175
2176 /*
2177 * Real page fault? (PDE level)
2178 */
2179 if ( (uErr & X86_TRAP_PF_RSVD)
2180 || !pPdeSrc->n.u1Present
2181# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2182 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
2183# endif
2184 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2185 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2186 {
2187 uPageFaultLevel = 2;
2188 goto l_UpperLevelPageFault;
2189 }
2190
2191 /*
2192 * First check the easy case where the page directory has been marked read-only to track
2193 * the dirty bit of an emulated BIG page
2194 */
2195 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2196 {
2197 /* Mark guest page directory as accessed */
2198# if PGM_GST_TYPE == PGM_TYPE_AMD64
2199 pPml4eSrc->n.u1Accessed = 1;
2200 pPdpeSrc->lm.u1Accessed = 1;
2201# endif
2202 pPdeSrc->b.u1Accessed = 1;
2203
2204 /*
2205 * Only write protection page faults are relevant here.
2206 */
2207 if (fWriteFault)
2208 {
2209 /* Mark guest page directory as dirty (BIG page only). */
2210 pPdeSrc->b.u1Dirty = 1;
2211
2212 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2213 {
2214 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2215
2216 Assert(pPdeSrc->b.u1Write);
2217
2218 pPdeDst->n.u1Write = 1;
2219 pPdeDst->n.u1Accessed = 1;
2220 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2221 PGM_INVL_BIG_PG(GCPtrPage);
2222 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2223 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2224 }
2225 }
2226 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2227 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2228 }
2229 /* else: 4KB page table */
2230
2231 /*
2232 * Map the guest page table.
2233 */
2234 PGSTPT pPTSrc;
2235 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2236 if (RT_SUCCESS(rc))
2237 {
2238 /*
2239 * Real page fault?
2240 */
2241 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2242 const GSTPTE PteSrc = *pPteSrc;
2243 if ( !PteSrc.n.u1Present
2244# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2245 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
2246# endif
2247 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
2248 || (fUserLevelFault && !PteSrc.n.u1User)
2249 )
2250 {
2251 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2252 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2253 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2254
2255 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2256 * See the 2nd case above as well.
2257 */
2258 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2259 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2260
2261 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2262 return VINF_EM_RAW_GUEST_TRAP;
2263 }
2264 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2265
2266 /*
2267 * Set the accessed bits in the page directory and the page table.
2268 */
2269# if PGM_GST_TYPE == PGM_TYPE_AMD64
2270 pPml4eSrc->n.u1Accessed = 1;
2271 pPdpeSrc->lm.u1Accessed = 1;
2272# endif
2273 pPdeSrc->n.u1Accessed = 1;
2274 pPteSrc->n.u1Accessed = 1;
2275
2276 /*
2277 * Only write protection page faults are relevant here.
2278 */
2279 if (fWriteFault)
2280 {
2281 /* Write access, so mark guest entry as dirty. */
2282# ifdef VBOX_WITH_STATISTICS
2283 if (!pPteSrc->n.u1Dirty)
2284 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2285 else
2286 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2287# endif
2288
2289 pPteSrc->n.u1Dirty = 1;
2290
2291 if (pPdeDst->n.u1Present)
2292 {
2293#ifndef IN_RING0
2294 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2295 * Our individual shadow handlers will provide more information and force a fatal exit.
2296 */
2297 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2298 {
2299 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2300 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2301 return VINF_SUCCESS;
2302 }
2303#endif
2304 /*
2305 * Map shadow page table.
2306 */
2307 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2308 if (pShwPage)
2309 {
2310 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2311 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2312 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
2313 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
2314 {
2315 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2316# ifdef VBOX_STRICT
2317 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2318 if (pPage)
2319 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
2320 ("Unexpected dirty bit tracking on monitored page %RGv (phys %RGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
2321# endif
2322 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2323
2324 Assert(pPteSrc->n.u1Write);
2325
2326 pPteDst->n.u1Write = 1;
2327 pPteDst->n.u1Dirty = 1;
2328 pPteDst->n.u1Accessed = 1;
2329 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2330 PGM_INVL_PG(GCPtrPage);
2331
2332 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2333 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2334 }
2335 }
2336 else
2337 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2338 }
2339 }
2340/** @todo Optimize accessed bit emulation? */
2341# ifdef VBOX_STRICT
2342 /*
2343 * Sanity check.
2344 */
2345 else if ( !pPteSrc->n.u1Dirty
2346 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2347 && pPdeDst->n.u1Present)
2348 {
2349 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2350 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2351 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2352 if ( pPteDst->n.u1Present
2353 && pPteDst->n.u1Write)
2354 LogFlow(("Writable present page %RGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2355 }
2356# endif /* VBOX_STRICT */
2357 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2358 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2359 }
2360 AssertRC(rc);
2361 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2362 return rc;
2363
2364
2365l_UpperLevelPageFault:
2366 /*
2367 * Pagefault detected while checking the PML4E, PDPE or PDE.
2368 * Single exit handler to get rid of duplicate code paths.
2369 */
2370 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2371 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2372 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2373
2374 if (
2375# if PGM_GST_TYPE == PGM_TYPE_AMD64
2376 pPml4eSrc->n.u1Present &&
2377# endif
2378# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2379 pPdpeSrc->n.u1Present &&
2380# endif
2381 pPdeSrc->n.u1Present)
2382 {
2383 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2384 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2385 {
2386 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2387 }
2388 else
2389 {
2390 /*
2391 * Map the guest page table.
2392 */
2393 PGSTPT pPTSrc;
2394 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2395 if (RT_SUCCESS(rc))
2396 {
2397 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2398 const GSTPTE PteSrc = *pPteSrc;
2399 if (pPteSrc->n.u1Present)
2400 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2401 }
2402 AssertRC(rc);
2403 }
2404 }
2405 return VINF_EM_RAW_GUEST_TRAP;
2406}
2407#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2408
2409
2410/**
2411 * Sync a shadow page table.
2412 *
2413 * The shadow page table is not present. This includes the case where
2414 * there is a conflict with a mapping.
2415 *
2416 * @returns VBox status code.
2417 * @param pVM VM handle.
2418 * @param iPD Page directory index.
2419 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2420 * Assume this is a temporary mapping.
2421 * @param GCPtrPage GC Pointer of the page that caused the fault
2422 */
2423PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2424{
2425 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2426 STAM_COUNTER_INC(&pVM->pgm.s.StatSyncPtPD[iPDSrc]);
2427 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2428
2429#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2430 || PGM_GST_TYPE == PGM_TYPE_PAE \
2431 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2432 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2433 && PGM_SHW_TYPE != PGM_TYPE_EPT
2434
2435 int rc = VINF_SUCCESS;
2436
2437 /*
2438 * Validate input a little bit.
2439 */
2440 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2441# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2442 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2443 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
2444
2445# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2446 /* Fetch the pgm pool shadow descriptor. */
2447 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
2448 Assert(pShwPde);
2449# endif
2450
2451# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2452# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2453 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2454 PPGMPOOLPAGE pShwPde;
2455 PX86PDPAE pPDDst;
2456 PSHWPDE pPdeDst;
2457
2458 /* Fetch the pgm pool shadow descriptor. */
2459 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
2460 AssertRCSuccessReturn(rc, rc);
2461 Assert(pShwPde);
2462
2463 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2464 pPdeDst = &pPDDst->a[iPDDst];
2465# else
2466 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) /*& SHW_PD_MASK - only pool index atm! */;
2467 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT); NOREF(iPdpt);
2468 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s); NOREF(pPdptDst);
2469 PSHWPDE pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
2470# endif
2471# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2472 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2473 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2474 PX86PDPAE pPDDst;
2475 PX86PDPT pPdptDst;
2476 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2477 AssertRCSuccessReturn(rc, rc);
2478 Assert(pPDDst);
2479 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2480# endif
2481 SHWPDE PdeDst = *pPdeDst;
2482
2483# if PGM_GST_TYPE == PGM_TYPE_AMD64
2484 /* Fetch the pgm pool shadow descriptor. */
2485 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2486 Assert(pShwPde);
2487# endif
2488
2489# ifndef PGM_WITHOUT_MAPPINGS
2490 /*
2491 * Check for conflicts.
2492 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2493 * HC: Simply resolve the conflict.
2494 */
2495 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2496 {
2497 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2498# ifndef IN_RING3
2499 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2500 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2501 return VERR_ADDRESS_CONFLICT;
2502# else
2503 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2504 Assert(pMapping);
2505# if PGM_GST_TYPE == PGM_TYPE_32BIT
2506 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2507# elif PGM_GST_TYPE == PGM_TYPE_PAE
2508 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2509# else
2510 AssertFailed(); /* can't happen for amd64 */
2511# endif
2512 if (RT_FAILURE(rc))
2513 {
2514 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2515 return rc;
2516 }
2517 PdeDst = *pPdeDst;
2518# endif
2519 }
2520# else /* PGM_WITHOUT_MAPPINGS */
2521 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2522# endif /* PGM_WITHOUT_MAPPINGS */
2523 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2524
2525# if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2526 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2527 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2528# endif
2529
2530 /*
2531 * Sync page directory entry.
2532 */
2533 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2534 if (PdeSrc.n.u1Present)
2535 {
2536 /*
2537 * Allocate & map the page table.
2538 */
2539 PSHWPT pPTDst;
2540# if PGM_GST_TYPE == PGM_TYPE_AMD64
2541 const bool fPageTable = !PdeSrc.b.u1Size;
2542# else
2543 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2544# endif
2545 PPGMPOOLPAGE pShwPage;
2546 RTGCPHYS GCPhys;
2547 if (fPageTable)
2548 {
2549 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2550# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2551 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2552 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2553# endif
2554# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2555 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2556# else
2557 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2558# endif
2559 }
2560 else
2561 {
2562 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2563# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2564 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2565 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2566# endif
2567# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2568 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, pShwPde->idx, iPDDst, &pShwPage);
2569# else
2570 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2571# endif
2572 }
2573 if (rc == VINF_SUCCESS)
2574 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2575 else if (rc == VINF_PGM_CACHED_PAGE)
2576 {
2577 /*
2578 * The PT was cached, just hook it up.
2579 */
2580 if (fPageTable)
2581 PdeDst.u = pShwPage->Core.Key
2582 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2583 else
2584 {
2585 PdeDst.u = pShwPage->Core.Key
2586 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2587 /* (see explanation and assumptions further down.) */
2588 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2589 {
2590 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2591 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2592 PdeDst.b.u1Write = 0;
2593 }
2594 }
2595 *pPdeDst = PdeDst;
2596# if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2597 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2598# endif
2599 return VINF_SUCCESS;
2600 }
2601 else if (rc == VERR_PGM_POOL_FLUSHED)
2602 {
2603 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
2604# if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2605 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2606# endif
2607 return VINF_PGM_SYNC_CR3;
2608 }
2609 else
2610 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2611 PdeDst.u &= X86_PDE_AVL_MASK;
2612 PdeDst.u |= pShwPage->Core.Key;
2613
2614 /*
2615 * Page directory has been accessed (this is a fault situation, remember).
2616 */
2617 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2618 if (fPageTable)
2619 {
2620 /*
2621 * Page table - 4KB.
2622 *
2623 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2624 */
2625 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2626 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2627 PGSTPT pPTSrc;
2628 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2629 if (RT_SUCCESS(rc))
2630 {
2631 /*
2632 * Start by syncing the page directory entry so CSAM's TLB trick works.
2633 */
2634 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2635 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2636 *pPdeDst = PdeDst;
2637# if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2638 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2639# endif
2640
2641 /*
2642 * Directory/page user or supervisor privilege: (same goes for read/write)
2643 *
2644 * Directory Page Combined
2645 * U/S U/S U/S
2646 * 0 0 0
2647 * 0 1 0
2648 * 1 0 0
2649 * 1 1 1
2650 *
2651 * Simple AND operation. Table listed for completeness.
2652 *
2653 */
2654 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2655# ifdef PGM_SYNC_N_PAGES
2656 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2657 unsigned iPTDst = iPTBase;
2658 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2659 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2660 iPTDst = 0;
2661 else
2662 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2663# else /* !PGM_SYNC_N_PAGES */
2664 unsigned iPTDst = 0;
2665 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2666# endif /* !PGM_SYNC_N_PAGES */
2667# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2668 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2669 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2670# else
2671 const unsigned offPTSrc = 0;
2672# endif
2673 for (; iPTDst < iPTDstEnd; iPTDst++)
2674 {
2675 const unsigned iPTSrc = iPTDst + offPTSrc;
2676 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2677
2678 if (PteSrc.n.u1Present) /* we've already cleared it above */
2679 {
2680# ifndef IN_RING0
2681 /*
2682 * Assuming kernel code will be marked as supervisor - and not as user level
2683 * and executed using a conforming code selector - And marked as readonly.
2684 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2685 */
2686 PPGMPAGE pPage;
2687 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2688 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2689 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2690 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2691 )
2692# endif
2693 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2694 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2695 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2696 PteSrc.n.u1Present,
2697 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2698 PteSrc.n.u1User & PdeSrc.n.u1User,
2699 (uint64_t)PteSrc.u,
2700 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2701 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2702 }
2703 } /* for PTEs */
2704 }
2705 }
2706 else
2707 {
2708 /*
2709 * Big page - 2/4MB.
2710 *
2711 * We'll walk the ram range list in parallel and optimize lookups.
2712 * We will only sync on shadow page table at a time.
2713 */
2714 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2715
2716 /**
2717 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2718 */
2719
2720 /*
2721 * Start by syncing the page directory entry.
2722 */
2723 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2724 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2725
2726 /*
2727 * If the page is not flagged as dirty and is writable, then make it read-only
2728 * at PD level, so we can set the dirty bit when the page is modified.
2729 *
2730 * ASSUMES that page access handlers are implemented on page table entry level.
2731 * Thus we will first catch the dirty access and set PDE.D and restart. If
2732 * there is an access handler, we'll trap again and let it work on the problem.
2733 */
2734 /** @todo move the above stuff to a section in the PGM documentation. */
2735 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2736 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2737 {
2738 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2739 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2740 PdeDst.b.u1Write = 0;
2741 }
2742 *pPdeDst = PdeDst;
2743# if defined(IN_RC) && defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2744 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2745# endif
2746
2747 /*
2748 * Fill the shadow page table.
2749 */
2750 /* Get address and flags from the source PDE. */
2751 SHWPTE PteDstBase;
2752 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2753
2754 /* Loop thru the entries in the shadow PT. */
2755 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2756 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2757 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2758 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2759 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2760 unsigned iPTDst = 0;
2761 while (iPTDst < RT_ELEMENTS(pPTDst->a))
2762 {
2763 /* Advance ram range list. */
2764 while (pRam && GCPhys > pRam->GCPhysLast)
2765 pRam = pRam->CTX_SUFF(pNext);
2766 if (pRam && GCPhys >= pRam->GCPhys)
2767 {
2768 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2769 do
2770 {
2771 /* Make shadow PTE. */
2772 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2773 SHWPTE PteDst;
2774
2775# ifdef VBOX_WITH_NEW_PHYS_CODE
2776# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2777 /* Try make the page writable if necessary. */
2778 if ( PteDstBase.n.u1Write
2779 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2780 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2781 {
2782 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
2783 AssertRCReturn(rc, rc);
2784 }
2785# endif
2786# else /* !VBOX_WITH_NEW_PHYS_CODE */
2787 /* Make sure the RAM has already been allocated. */
2788 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC) /** @todo PAGE FLAGS */
2789 {
2790 if (RT_UNLIKELY(!PGM_PAGE_GET_HCPHYS(pPage)))
2791 {
2792# ifdef IN_RING3
2793 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2794# else
2795 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2796# endif
2797 if (rc != VINF_SUCCESS)
2798 return rc;
2799 }
2800 }
2801# endif /* !VBOX_WITH_NEW_PHYS_CODE */
2802
2803 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2804 {
2805 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2806 {
2807 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2808 PteDst.n.u1Write = 0;
2809 }
2810 else
2811 PteDst.u = 0;
2812 }
2813# ifndef IN_RING0
2814 /*
2815 * Assuming kernel code will be marked as supervisor and not as user level and executed
2816 * using a conforming code selector. Don't check for readonly, as that implies the whole
2817 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2818 */
2819 else if ( !PdeSrc.n.u1User
2820 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2821 PteDst.u = 0;
2822# endif
2823 else
2824 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2825
2826# ifdef VBOX_WITH_NEW_PHYS_CODE
2827 /* Only map writable pages writable. */
2828 if ( PteDst.n.u1Write
2829 && PteDst.n.u1Present
2830 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2831 {
2832 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2833 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2834 }
2835# endif
2836
2837# ifdef PGMPOOL_WITH_USER_TRACKING
2838 if (PteDst.n.u1Present)
2839 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2840# endif
2841 /* commit it */
2842 pPTDst->a[iPTDst] = PteDst;
2843 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2844 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2845 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2846
2847 /* advance */
2848 GCPhys += PAGE_SIZE;
2849 iHCPage++;
2850 iPTDst++;
2851 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2852 && GCPhys <= pRam->GCPhysLast);
2853 }
2854 else if (pRam)
2855 {
2856 Log(("Invalid pages at %RGp\n", GCPhys));
2857 do
2858 {
2859 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2860 GCPhys += PAGE_SIZE;
2861 iPTDst++;
2862 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2863 && GCPhys < pRam->GCPhys);
2864 }
2865 else
2866 {
2867 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2868 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2869 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2870 }
2871 } /* while more PTEs */
2872 } /* 4KB / 4MB */
2873 }
2874 else
2875 AssertRelease(!PdeDst.n.u1Present);
2876
2877 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2878 if (RT_FAILURE(rc))
2879 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2880 return rc;
2881
2882#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2883 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2884 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
2885
2886
2887 /*
2888 * Validate input a little bit.
2889 */
2890 int rc = VINF_SUCCESS;
2891# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2892 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2893 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
2894
2895# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2896 /* Fetch the pgm pool shadow descriptor. */
2897 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
2898 Assert(pShwPde);
2899# endif
2900
2901# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2902# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2903 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2904 PPGMPOOLPAGE pShwPde;
2905 PX86PDPAE pPDDst;
2906 PSHWPDE pPdeDst;
2907
2908 /* Fetch the pgm pool shadow descriptor. */
2909 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
2910 AssertRCSuccessReturn(rc, rc);
2911 Assert(pShwPde);
2912
2913 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2914 pPdeDst = &pPDDst->a[iPDDst];
2915# else
2916 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) /*& SHW_PD_MASK - only pool index atm!*/;
2917 PX86PDEPAE pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
2918# endif
2919
2920# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2921 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2922 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2923 PX86PDPAE pPDDst;
2924 PX86PDPT pPdptDst;
2925 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2926 AssertRCSuccessReturn(rc, rc);
2927 Assert(pPDDst);
2928 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2929
2930 /* Fetch the pgm pool shadow descriptor. */
2931 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2932 Assert(pShwPde);
2933
2934# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2935 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2936 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2937 PEPTPD pPDDst;
2938 PEPTPDPT pPdptDst;
2939
2940 rc = pgmShwGetEPTPDPtr(pVM, GCPtrPage, &pPdptDst, &pPDDst);
2941 if (rc != VINF_SUCCESS)
2942 {
2943 AssertRC(rc);
2944 return rc;
2945 }
2946 Assert(pPDDst);
2947 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2948
2949 /* Fetch the pgm pool shadow descriptor. */
2950 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2951 Assert(pShwPde);
2952# endif
2953 SHWPDE PdeDst = *pPdeDst;
2954
2955 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2956 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2957
2958 GSTPDE PdeSrc;
2959 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2960 PdeSrc.n.u1Present = 1;
2961 PdeSrc.n.u1Write = 1;
2962 PdeSrc.n.u1Accessed = 1;
2963 PdeSrc.n.u1User = 1;
2964
2965 /*
2966 * Allocate & map the page table.
2967 */
2968 PSHWPT pPTDst;
2969 PPGMPOOLPAGE pShwPage;
2970 RTGCPHYS GCPhys;
2971
2972 /* Virtual address = physical address */
2973 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
2974# if PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_EPT || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2975 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2976# else
2977 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2978# endif
2979
2980 if ( rc == VINF_SUCCESS
2981 || rc == VINF_PGM_CACHED_PAGE)
2982 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2983 else
2984 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2985
2986 PdeDst.u &= X86_PDE_AVL_MASK;
2987 PdeDst.u |= pShwPage->Core.Key;
2988 PdeDst.n.u1Present = 1;
2989 PdeDst.n.u1Write = 1;
2990# if PGM_SHW_TYPE == PGM_TYPE_EPT
2991 PdeDst.n.u1Execute = 1;
2992# else
2993 PdeDst.n.u1User = 1;
2994 PdeDst.n.u1Accessed = 1;
2995# endif
2996 *pPdeDst = PdeDst;
2997
2998 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2999 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3000 return rc;
3001
3002#else
3003 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3004 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3005 return VERR_INTERNAL_ERROR;
3006#endif
3007}
3008
3009
3010
3011/**
3012 * Prefetch a page/set of pages.
3013 *
3014 * Typically used to sync commonly used pages before entering raw mode
3015 * after a CR3 reload.
3016 *
3017 * @returns VBox status code.
3018 * @param pVM VM handle.
3019 * @param GCPtrPage Page to invalidate.
3020 */
3021PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCPTR GCPtrPage)
3022{
3023#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3024 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3025 /*
3026 * Check that all Guest levels thru the PDE are present, getting the
3027 * PD and PDE in the processes.
3028 */
3029 int rc = VINF_SUCCESS;
3030# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3031# if PGM_GST_TYPE == PGM_TYPE_32BIT
3032 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3033 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3034# elif PGM_GST_TYPE == PGM_TYPE_PAE
3035 unsigned iPDSrc;
3036# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3037 X86PDPE PdpeSrc;
3038 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3039# else
3040 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, NULL);
3041# endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
3042 if (!pPDSrc)
3043 return VINF_SUCCESS; /* not present */
3044# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3045 unsigned iPDSrc;
3046 PX86PML4E pPml4eSrc;
3047 X86PDPE PdpeSrc;
3048 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3049 if (!pPDSrc)
3050 return VINF_SUCCESS; /* not present */
3051# endif
3052 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3053# else
3054 PGSTPD pPDSrc = NULL;
3055 const unsigned iPDSrc = 0;
3056 GSTPDE PdeSrc;
3057
3058 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3059 PdeSrc.n.u1Present = 1;
3060 PdeSrc.n.u1Write = 1;
3061 PdeSrc.n.u1Accessed = 1;
3062 PdeSrc.n.u1User = 1;
3063# endif
3064
3065 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3066 {
3067# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3068 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVM->pgm.s, GCPtrPage);
3069# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3070# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3071 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3072 PX86PDPAE pPDDst;
3073 X86PDEPAE PdeDst;
3074# if PGM_GST_TYPE != PGM_TYPE_PAE
3075 X86PDPE PdpeSrc;
3076
3077 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3078 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3079# endif
3080 int rc = pgmShwSyncPaePDPtr(pVM, GCPtrPage, &PdpeSrc, &pPDDst);
3081 if (rc != VINF_SUCCESS)
3082 {
3083 AssertRC(rc);
3084 return rc;
3085 }
3086 Assert(pPDDst);
3087 PdeDst = pPDDst->a[iPDDst];
3088# else
3089 const X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVM->pgm.s, GCPtrPage);
3090# endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
3091
3092# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3093 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3094 PX86PDPAE pPDDst;
3095 X86PDEPAE PdeDst;
3096
3097# if PGM_GST_TYPE == PGM_TYPE_PROT
3098 /* AMD-V nested paging */
3099 X86PML4E Pml4eSrc;
3100 X86PDPE PdpeSrc;
3101 PX86PML4E pPml4eSrc = &Pml4eSrc;
3102
3103 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3104 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3105 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3106# endif
3107
3108 int rc = pgmShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3109 if (rc != VINF_SUCCESS)
3110 {
3111 AssertRC(rc);
3112 return rc;
3113 }
3114 Assert(pPDDst);
3115 PdeDst = pPDDst->a[iPDDst];
3116# endif
3117 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3118 {
3119 if (!PdeDst.n.u1Present)
3120 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3121 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
3122 else
3123 {
3124 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3125 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3126 * makes no sense to prefetch more than one page.
3127 */
3128 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
3129 if (RT_SUCCESS(rc))
3130 rc = VINF_SUCCESS;
3131 }
3132 }
3133 }
3134 return rc;
3135
3136#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3137 return VINF_SUCCESS; /* ignore */
3138#endif
3139}
3140
3141
3142
3143
3144/**
3145 * Syncs a page during a PGMVerifyAccess() call.
3146 *
3147 * @returns VBox status code (informational included).
3148 * @param GCPtrPage The address of the page to sync.
3149 * @param fPage The effective guest page flags.
3150 * @param uErr The trap error code.
3151 */
3152PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3153{
3154 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3155
3156 Assert(!HWACCMIsNestedPagingActive(pVM));
3157#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3158 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3159
3160# ifndef IN_RING0
3161 if (!(fPage & X86_PTE_US))
3162 {
3163 /*
3164 * Mark this page as safe.
3165 */
3166 /** @todo not correct for pages that contain both code and data!! */
3167 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3168 CSAMMarkPage(pVM, (RTRCPTR)GCPtrPage, true);
3169 }
3170# endif
3171
3172 /*
3173 * Get guest PD and index.
3174 */
3175# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3176# if PGM_GST_TYPE == PGM_TYPE_32BIT
3177 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3178 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3179# elif PGM_GST_TYPE == PGM_TYPE_PAE
3180 unsigned iPDSrc;
3181# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3182 X86PDPE PdpeSrc;
3183 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3184# else
3185 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, NULL);
3186# endif
3187
3188 if (pPDSrc)
3189 {
3190 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3191 return VINF_EM_RAW_GUEST_TRAP;
3192 }
3193# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3194 unsigned iPDSrc;
3195 PX86PML4E pPml4eSrc;
3196 X86PDPE PdpeSrc;
3197 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3198 if (!pPDSrc)
3199 {
3200 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3201 return VINF_EM_RAW_GUEST_TRAP;
3202 }
3203# endif
3204# else
3205 PGSTPD pPDSrc = NULL;
3206 const unsigned iPDSrc = 0;
3207# endif
3208 int rc = VINF_SUCCESS;
3209
3210 /*
3211 * First check if the shadow pd is present.
3212 */
3213# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3214 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
3215# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3216 PX86PDEPAE pPdeDst;
3217# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3218 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3219 PX86PDPAE pPDDst;
3220# if PGM_GST_TYPE != PGM_TYPE_PAE
3221 X86PDPE PdpeSrc;
3222
3223 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3224 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3225# endif
3226 rc = pgmShwSyncPaePDPtr(pVM, GCPtrPage, &PdpeSrc, &pPDDst);
3227 if (rc != VINF_SUCCESS)
3228 {
3229 AssertRC(rc);
3230 return rc;
3231 }
3232 Assert(pPDDst);
3233 pPdeDst = &pPDDst->a[iPDDst];
3234# else
3235 pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
3236# endif
3237# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3238 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3239 PX86PDPAE pPDDst;
3240 PX86PDEPAE pPdeDst;
3241
3242# if PGM_GST_TYPE == PGM_TYPE_PROT
3243 /* AMD-V nested paging */
3244 X86PML4E Pml4eSrc;
3245 X86PDPE PdpeSrc;
3246 PX86PML4E pPml4eSrc = &Pml4eSrc;
3247
3248 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3249 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3250 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3251# endif
3252
3253 rc = pgmShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3254 if (rc != VINF_SUCCESS)
3255 {
3256 AssertRC(rc);
3257 return rc;
3258 }
3259 Assert(pPDDst);
3260 pPdeDst = &pPDDst->a[iPDDst];
3261# endif
3262 if (!pPdeDst->n.u1Present)
3263 {
3264 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
3265 AssertRC(rc);
3266 if (rc != VINF_SUCCESS)
3267 return rc;
3268 }
3269
3270# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3271 /* Check for dirty bit fault */
3272 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3273 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3274 Log(("PGMVerifyAccess: success (dirty)\n"));
3275 else
3276 {
3277 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3278#else
3279 {
3280 GSTPDE PdeSrc;
3281 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3282 PdeSrc.n.u1Present = 1;
3283 PdeSrc.n.u1Write = 1;
3284 PdeSrc.n.u1Accessed = 1;
3285 PdeSrc.n.u1User = 1;
3286
3287#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3288 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3289 if (uErr & X86_TRAP_PF_US)
3290 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3291 else /* supervisor */
3292 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3293
3294 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
3295 if (RT_SUCCESS(rc))
3296 {
3297 /* Page was successfully synced */
3298 Log2(("PGMVerifyAccess: success (sync)\n"));
3299 rc = VINF_SUCCESS;
3300 }
3301 else
3302 {
3303 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3304 return VINF_EM_RAW_GUEST_TRAP;
3305 }
3306 }
3307 return rc;
3308
3309#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3310
3311 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3312 return VERR_INTERNAL_ERROR;
3313#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3314}
3315
3316
3317#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3318# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
3319/**
3320 * Figures out which kind of shadow page this guest PDE warrants.
3321 *
3322 * @returns Shadow page kind.
3323 * @param pPdeSrc The guest PDE in question.
3324 * @param cr4 The current guest cr4 value.
3325 */
3326DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const GSTPDE *pPdeSrc, uint32_t cr4)
3327{
3328# if PMG_GST_TYPE == PGM_TYPE_AMD64
3329 if (!pPdeSrc->n.u1Size)
3330# else
3331 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
3332# endif
3333 return BTH_PGMPOOLKIND_PT_FOR_PT;
3334 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
3335 //{
3336 // case 0:
3337 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
3338 // case X86_PDE4M_RW:
3339 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
3340 // case X86_PDE4M_US:
3341 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
3342 // case X86_PDE4M_RW | X86_PDE4M_US:
3343 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
3344# if 0
3345 // case X86_PDE4M_PAE_NX:
3346 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
3347 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
3348 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
3349 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
3350 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
3351 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
3352 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
3353# endif
3354 return BTH_PGMPOOLKIND_PT_FOR_BIG;
3355 //}
3356}
3357# endif
3358#endif
3359
3360#undef MY_STAM_COUNTER_INC
3361#define MY_STAM_COUNTER_INC(a) do { } while (0)
3362
3363
3364/**
3365 * Syncs the paging hierarchy starting at CR3.
3366 *
3367 * @returns VBox status code, no specials.
3368 * @param pVM The virtual machine.
3369 * @param cr0 Guest context CR0 register
3370 * @param cr3 Guest context CR3 register
3371 * @param cr4 Guest context CR4 register
3372 * @param fGlobal Including global page directories or not
3373 */
3374PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3375{
3376 if (VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
3377 fGlobal = true; /* Change this CR3 reload to be a global one. */
3378
3379 LogFlow(("SyncCR3 %d\n", fGlobal));
3380
3381#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3382 /*
3383 * Update page access handlers.
3384 * The virtual are always flushed, while the physical are only on demand.
3385 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3386 * have to look into that later because it will have a bad influence on the performance.
3387 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3388 * bird: Yes, but that won't work for aliases.
3389 */
3390 /** @todo this MUST go away. See #1557. */
3391 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3392 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3393 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3394#endif
3395
3396#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3397 /*
3398 * Nested / EPT - almost no work.
3399 */
3400 /** @todo check if this is really necessary; the call does it as well... */
3401 HWACCMFlushTLB(pVM);
3402 return VINF_SUCCESS;
3403
3404#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3405 /*
3406 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3407 * out the shadow parts when the guest modifies its tables.
3408 */
3409 return VINF_SUCCESS;
3410
3411#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3412
3413# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3414# ifdef PGM_WITHOUT_MAPPINGS
3415 Assert(pVM->pgm.s.fMappingsFixed);
3416 return VINF_SUCCESS;
3417# else
3418 /* Nothing to do when mappings are fixed. */
3419 if (pVM->pgm.s.fMappingsFixed)
3420 return VINF_SUCCESS;
3421
3422 int rc = PGMMapResolveConflicts(pVM);
3423 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3424 if (rc == VINF_PGM_SYNC_CR3)
3425 {
3426 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3427 return VINF_PGM_SYNC_CR3;
3428 }
3429# endif
3430 return VINF_SUCCESS;
3431# else
3432 /*
3433 * PAE and 32-bit legacy mode (shadow).
3434 * (Guest PAE, 32-bit legacy, protected and real modes.)
3435 */
3436 Assert(fGlobal || (cr4 & X86_CR4_PGE));
3437 MY_STAM_COUNTER_INC(fGlobal ? &pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Global) : &pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3NotGlobal));
3438
3439# if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE
3440 bool const fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
3441
3442 /*
3443 * Get page directory addresses.
3444 */
3445# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3446 PX86PDE pPDEDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, 0);
3447# else /* PGM_SHW_TYPE == PGM_TYPE_PAE */
3448# if PGM_GST_TYPE == PGM_TYPE_32BIT
3449 PX86PDEPAE pPDEDst = NULL;
3450# endif
3451# endif
3452
3453# if PGM_GST_TYPE == PGM_TYPE_32BIT
3454 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3455 Assert(pPDSrc);
3456# if !defined(IN_RC) && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
3457 Assert(PGMPhysGCPhys2R3PtrAssert(pVM, (RTGCPHYS)(cr3 & GST_CR3_PAGE_MASK), sizeof(*pPDSrc)) == (RTR3PTR)pPDSrc);
3458# endif
3459# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3460
3461 /*
3462 * Iterate the the CR3 page.
3463 */
3464 PPGMMAPPING pMapping;
3465 unsigned iPdNoMapping;
3466 const bool fRawR0Enabled = EMIsRawRing0Enabled(pVM);
3467 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3468
3469 /* Only check mappings if they are supposed to be put into the shadow page table. */
3470 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
3471 {
3472 pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
3473 iPdNoMapping = (pMapping) ? (pMapping->GCPtr >> GST_PD_SHIFT) : ~0U;
3474 }
3475 else
3476 {
3477 pMapping = 0;
3478 iPdNoMapping = ~0U;
3479 }
3480
3481# if PGM_GST_TYPE == PGM_TYPE_PAE
3482 for (uint64_t iPdpt = 0; iPdpt < GST_PDPE_ENTRIES; iPdpt++)
3483 {
3484 unsigned iPDSrc;
3485 X86PDPE PdpeSrc;
3486 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, iPdpt << X86_PDPT_SHIFT, &iPDSrc, &PdpeSrc);
3487 PX86PDEPAE pPDEDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, iPdpt << X86_PDPT_SHIFT);
3488 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
3489
3490 if (pPDSrc == NULL)
3491 {
3492 /* PDPE not present */
3493 if (pPdptDst->a[iPdpt].n.u1Present)
3494 {
3495 LogFlow(("SyncCR3: guest PDPE %lld not present; clear shw pdpe\n", iPdpt));
3496 /* for each page directory entry */
3497 for (unsigned iPD = 0; iPD < RT_ELEMENTS(pPDSrc->a); iPD++)
3498 {
3499 if ( pPDEDst[iPD].n.u1Present
3500 && !(pPDEDst[iPD].u & PGM_PDFLAGS_MAPPING))
3501 {
3502 pgmPoolFree(pVM, pPDEDst[iPD].u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPdpt * X86_PG_PAE_ENTRIES + iPD);
3503 pPDEDst[iPD].u = 0;
3504 }
3505 }
3506 }
3507 if (!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING))
3508 pPdptDst->a[iPdpt].n.u1Present = 0;
3509 continue;
3510 }
3511# else /* PGM_GST_TYPE != PGM_TYPE_PAE */
3512 {
3513# endif /* PGM_GST_TYPE != PGM_TYPE_PAE */
3514 for (unsigned iPD = 0; iPD < RT_ELEMENTS(pPDSrc->a); iPD++)
3515 {
3516# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3517 if ((iPD & 255) == 0) /* Start of new PD. */
3518 pPDEDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, (uint32_t)iPD << GST_PD_SHIFT);
3519# endif
3520# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3521 Assert(pgmShwGet32BitPDEPtr(&pVM->pgm.s, (uint32_t)iPD << SHW_PD_SHIFT) == pPDEDst);
3522# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3523# if defined(VBOX_STRICT) && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) /* Unfortuantely not reliable with PGMR0DynMap and multiple VMs. */
3524 RTGCPTR GCPtrStrict = (uint32_t)iPD << GST_PD_SHIFT;
3525# if PGM_GST_TYPE == PGM_TYPE_PAE
3526 GCPtrStrict |= iPdpt << X86_PDPT_SHIFT;
3527# endif
3528 AssertMsg(pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrStrict) == pPDEDst, ("%p vs %p (%RGv)\n", pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrStrict), pPDEDst, GCPtrStrict));
3529# endif /* VBOX_STRICT */
3530# endif
3531 GSTPDE PdeSrc = pPDSrc->a[iPD];
3532 if ( PdeSrc.n.u1Present
3533 && (PdeSrc.n.u1User || fRawR0Enabled))
3534 {
3535# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3536 || PGM_GST_TYPE == PGM_TYPE_PAE) \
3537 && !defined(PGM_WITHOUT_MAPPINGS)
3538
3539 /*
3540 * Check for conflicts with GC mappings.
3541 */
3542# if PGM_GST_TYPE == PGM_TYPE_PAE
3543 if (iPD + iPdpt * X86_PG_PAE_ENTRIES == iPdNoMapping)
3544# else
3545 if (iPD == iPdNoMapping)
3546# endif
3547 {
3548 if (pVM->pgm.s.fMappingsFixed)
3549 {
3550 /* It's fixed, just skip the mapping. */
3551 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
3552 Assert(PGM_GST_TYPE == PGM_TYPE_32BIT || (iPD + cPTs - 1) / X86_PG_PAE_ENTRIES == iPD / X86_PG_PAE_ENTRIES);
3553 iPD += cPTs - 1;
3554# if PGM_SHW_TYPE != PGM_GST_TYPE /* SHW==PAE && GST==32BIT */
3555 pPDEDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, (uint32_t)(iPD + 1) << GST_PD_SHIFT);
3556# else
3557 pPDEDst += cPTs;
3558# endif
3559 pMapping = pMapping->CTX_SUFF(pNext);
3560 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3561 continue;
3562 }
3563# ifdef IN_RING3
3564# if PGM_GST_TYPE == PGM_TYPE_32BIT
3565 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
3566# elif PGM_GST_TYPE == PGM_TYPE_PAE
3567 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPdpt << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
3568# endif
3569 if (RT_FAILURE(rc))
3570 return rc;
3571
3572 /*
3573 * Update iPdNoMapping and pMapping.
3574 */
3575 pMapping = pVM->pgm.s.pMappingsR3;
3576 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
3577 pMapping = pMapping->pNextR3;
3578 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3579# else /* !IN_RING3 */
3580 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3581 return VINF_PGM_SYNC_CR3;
3582# endif /* !IN_RING3 */
3583 }
3584# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3585 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3586# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3587
3588 /*
3589 * Sync page directory entry.
3590 *
3591 * The current approach is to allocated the page table but to set
3592 * the entry to not-present and postpone the page table synching till
3593 * it's actually used.
3594 */
3595# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3596 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
3597# elif PGM_GST_TYPE == PGM_TYPE_PAE
3598 const unsigned iPdShw = iPD + iPdpt * X86_PG_PAE_ENTRIES; NOREF(iPdShw);
3599# else
3600 const unsigned iPdShw = iPD; NOREF(iPdShw);
3601# endif
3602 {
3603 SHWPDE PdeDst = *pPDEDst;
3604 if (PdeDst.n.u1Present)
3605 {
3606 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
3607 RTGCPHYS GCPhys;
3608 if ( !PdeSrc.b.u1Size
3609 || !fBigPagesSupported)
3610 {
3611 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
3612# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3613 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3614 GCPhys |= i * (PAGE_SIZE / 2);
3615# endif
3616 }
3617 else
3618 {
3619 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3620# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3621 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
3622 GCPhys |= i * X86_PAGE_2M_SIZE;
3623# endif
3624 }
3625
3626 if ( pShwPage->GCPhys == GCPhys
3627 && pShwPage->enmKind == PGM_BTH_NAME(CalcPageKind)(&PdeSrc, cr4)
3628 && ( pShwPage->fCached
3629 || ( !fGlobal
3630 && ( false
3631# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
3632 || ( (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
3633 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE)) /* global 2/4MB page. */
3634 || ( !pShwPage->fSeenNonGlobal
3635 && (cr4 & X86_CR4_PGE))
3636# endif
3637 )
3638 )
3639 )
3640 && ( (PdeSrc.u & (X86_PDE_US | X86_PDE_RW)) == (PdeDst.u & (X86_PDE_US | X86_PDE_RW))
3641 || ( fBigPagesSupported
3642 && ((PdeSrc.u & (X86_PDE_US | X86_PDE4M_PS | X86_PDE4M_D)) | PGM_PDFLAGS_TRACK_DIRTY)
3643 == ((PdeDst.u & (X86_PDE_US | X86_PDE_RW | PGM_PDFLAGS_TRACK_DIRTY)) | X86_PDE4M_PS))
3644 )
3645 )
3646 {
3647# ifdef VBOX_WITH_STATISTICS
3648 if ( !fGlobal
3649 && (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
3650 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE))
3651 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstSkippedGlobalPD));
3652 else if (!fGlobal && !pShwPage->fSeenNonGlobal && (cr4 & X86_CR4_PGE))
3653 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstSkippedGlobalPT));
3654 else
3655 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstCacheHit));
3656# endif /* VBOX_WITH_STATISTICS */
3657 /** @todo a replacement strategy isn't really needed unless we're using a very small pool < 512 pages.
3658 * The whole ageing stuff should be put in yet another set of #ifdefs. For now, let's just skip it. */
3659 //# ifdef PGMPOOL_WITH_CACHE
3660 // pgmPoolCacheUsed(pPool, pShwPage);
3661 //# endif
3662 }
3663 else
3664 {
3665 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPdShw);
3666 pPDEDst->u = 0;
3667 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstFreed));
3668 }
3669 }
3670 else
3671 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstNotPresent));
3672
3673 /* advance */
3674 pPDEDst++;
3675 } /* foreach 2MB PAE PDE in 4MB guest PDE */
3676 }
3677# if PGM_GST_TYPE == PGM_TYPE_PAE
3678 else if (iPD + iPdpt * X86_PG_PAE_ENTRIES != iPdNoMapping)
3679# else
3680 else if (iPD != iPdNoMapping)
3681# endif
3682 {
3683 /*
3684 * Check if there is any page directory to mark not present here.
3685 */
3686# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3687 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
3688# elif PGM_GST_TYPE == PGM_TYPE_PAE
3689 const unsigned iPdShw = iPD + iPdpt * X86_PG_PAE_ENTRIES;
3690# else
3691 const unsigned iPdShw = iPD;
3692# endif
3693 {
3694 if (pPDEDst->n.u1Present)
3695 {
3696 pgmPoolFree(pVM, pPDEDst->u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPdShw);
3697 pPDEDst->u = 0;
3698 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstFreedSrcNP));
3699 }
3700 pPDEDst++;
3701 }
3702 }
3703 else
3704 {
3705# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3706 || PGM_GST_TYPE == PGM_TYPE_PAE) \
3707 && !defined(PGM_WITHOUT_MAPPINGS)
3708
3709 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
3710
3711 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3712 if (pVM->pgm.s.fMappingsFixed)
3713 {
3714 /* It's fixed, just skip the mapping. */
3715 pMapping = pMapping->CTX_SUFF(pNext);
3716 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3717 }
3718 else
3719 {
3720 /*
3721 * Check for conflicts for subsequent pagetables
3722 * and advance to the next mapping.
3723 */
3724 iPdNoMapping = ~0U;
3725 unsigned iPT = cPTs;
3726 while (iPT-- > 1)
3727 {
3728 if ( pPDSrc->a[iPD + iPT].n.u1Present
3729 && (pPDSrc->a[iPD + iPT].n.u1User || fRawR0Enabled))
3730 {
3731# ifdef IN_RING3
3732# if PGM_GST_TYPE == PGM_TYPE_32BIT
3733 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
3734# elif PGM_GST_TYPE == PGM_TYPE_PAE
3735 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPdpt << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
3736# endif
3737 if (RT_FAILURE(rc))
3738 return rc;
3739
3740 /*
3741 * Update iPdNoMapping and pMapping.
3742 */
3743 pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
3744 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
3745 pMapping = pMapping->CTX_SUFF(pNext);
3746 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3747 break;
3748# else
3749 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3750 return VINF_PGM_SYNC_CR3;
3751# endif
3752 }
3753 }
3754 if (iPdNoMapping == ~0U && pMapping)
3755 {
3756 pMapping = pMapping->CTX_SUFF(pNext);
3757 if (pMapping)
3758 iPdNoMapping = pMapping->GCPtr >> GST_PD_SHIFT;
3759 }
3760 }
3761
3762 /* advance. */
3763 Assert(PGM_GST_TYPE == PGM_TYPE_32BIT || (iPD + cPTs - 1) / X86_PG_PAE_ENTRIES == iPD / X86_PG_PAE_ENTRIES);
3764 iPD += cPTs - 1;
3765# if PGM_SHW_TYPE != PGM_GST_TYPE /* SHW==PAE && GST==32BIT */
3766 pPDEDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, (uint32_t)(iPD + 1) << GST_PD_SHIFT);
3767# else
3768 pPDEDst += cPTs;
3769# endif
3770# if PGM_GST_TYPE != PGM_SHW_TYPE
3771 AssertCompile(PGM_GST_TYPE == PGM_TYPE_32BIT && PGM_SHW_TYPE == PGM_TYPE_PAE);
3772# endif
3773# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3774 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3775# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3776 }
3777
3778 } /* for iPD */
3779 } /* for each PDPTE (PAE) */
3780 return VINF_SUCCESS;
3781
3782# else /* guest real and protected mode */
3783 return VINF_SUCCESS;
3784# endif
3785#endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
3786#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3787}
3788
3789
3790
3791
3792#ifdef VBOX_STRICT
3793#ifdef IN_RC
3794# undef AssertMsgFailed
3795# define AssertMsgFailed Log
3796#endif
3797#ifdef IN_RING3
3798# include <VBox/dbgf.h>
3799
3800/**
3801 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3802 *
3803 * @returns VBox status code (VINF_SUCCESS).
3804 * @param pVM The VM handle.
3805 * @param cr3 The root of the hierarchy.
3806 * @param crr The cr4, only PAE and PSE is currently used.
3807 * @param fLongMode Set if long mode, false if not long mode.
3808 * @param cMaxDepth Number of levels to dump.
3809 * @param pHlp Pointer to the output functions.
3810 */
3811__BEGIN_DECLS
3812VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3813__END_DECLS
3814
3815#endif
3816
3817/**
3818 * Checks that the shadow page table is in sync with the guest one.
3819 *
3820 * @returns The number of errors.
3821 * @param pVM The virtual machine.
3822 * @param cr3 Guest context CR3 register
3823 * @param cr4 Guest context CR4 register
3824 * @param GCPtr Where to start. Defaults to 0.
3825 * @param cb How much to check. Defaults to everything.
3826 */
3827PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3828{
3829#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3830 return 0;
3831#else
3832 unsigned cErrors = 0;
3833
3834#if PGM_GST_TYPE == PGM_TYPE_PAE
3835 /** @todo currently broken; crashes below somewhere */
3836 AssertFailed();
3837#endif
3838
3839#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3840 || PGM_GST_TYPE == PGM_TYPE_PAE \
3841 || PGM_GST_TYPE == PGM_TYPE_AMD64
3842
3843# if PGM_GST_TYPE == PGM_TYPE_AMD64
3844 bool fBigPagesSupported = true;
3845# else
3846 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
3847# endif
3848 PPGM pPGM = &pVM->pgm.s;
3849 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3850 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3851# ifndef IN_RING0
3852 RTHCPHYS HCPhys; /* general usage. */
3853# endif
3854 int rc;
3855
3856 /*
3857 * Check that the Guest CR3 and all its mappings are correct.
3858 */
3859 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3860 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3861 false);
3862# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3863# if PGM_GST_TYPE == PGM_TYPE_32BIT
3864 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3865# else
3866 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3867# endif
3868 AssertRCReturn(rc, 1);
3869 HCPhys = NIL_RTHCPHYS;
3870 rc = pgmRamGCPhys2HCPhys(pPGM, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3871 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3872# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3873 RTGCPHYS GCPhys;
3874 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3875 AssertRCReturn(rc, 1);
3876 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3877# endif
3878# endif /* !IN_RING0 */
3879
3880 /*
3881 * Get and check the Shadow CR3.
3882 */
3883# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3884 unsigned cPDEs = X86_PG_ENTRIES;
3885 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3886# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3887# if PGM_GST_TYPE == PGM_TYPE_32BIT
3888 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3889# else
3890 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3891# endif
3892 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3893# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3894 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3895 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3896# endif
3897 if (cb != ~(RTGCPTR)0)
3898 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3899
3900/** @todo call the other two PGMAssert*() functions. */
3901
3902# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3903 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3904# endif
3905
3906# if PGM_GST_TYPE == PGM_TYPE_AMD64
3907 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3908
3909 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3910 {
3911 PPGMPOOLPAGE pShwPdpt = NULL;
3912 PX86PML4E pPml4eSrc;
3913 PX86PML4E pPml4eDst;
3914 RTGCPHYS GCPhysPdptSrc;
3915
3916 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVM->pgm.s, iPml4);
3917 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVM->pgm.s, iPml4);
3918
3919 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3920 if (!pPml4eDst->n.u1Present)
3921 {
3922 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3923 continue;
3924 }
3925
3926 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3927 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3928
3929 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3930 {
3931 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3932 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3933 cErrors++;
3934 continue;
3935 }
3936
3937 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3938 {
3939 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3940 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3941 cErrors++;
3942 continue;
3943 }
3944
3945 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3946 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3947 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3948 {
3949 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3950 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3951 cErrors++;
3952 continue;
3953 }
3954# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3955 {
3956# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3957
3958# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3959 /*
3960 * Check the PDPTEs too.
3961 */
3962 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3963
3964 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3965 {
3966 unsigned iPDSrc;
3967 PPGMPOOLPAGE pShwPde = NULL;
3968 PX86PDPE pPdpeDst;
3969 RTGCPHYS GCPhysPdeSrc;
3970# if PGM_GST_TYPE == PGM_TYPE_PAE
3971 X86PDPE PdpeSrc;
3972 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3973 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
3974# else
3975 PX86PML4E pPml4eSrc;
3976 X86PDPE PdpeSrc;
3977 PX86PDPT pPdptDst;
3978 PX86PDPAE pPDDst;
3979 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3980
3981 rc = pgmShwGetLongModePDPtr(pVM, GCPtr, NULL, &pPdptDst, &pPDDst);
3982 if (rc != VINF_SUCCESS)
3983 {
3984 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3985 GCPtr += 512 * _2M;
3986 continue; /* next PDPTE */
3987 }
3988 Assert(pPDDst);
3989# endif
3990 Assert(iPDSrc == 0);
3991
3992 pPdpeDst = &pPdptDst->a[iPdpt];
3993
3994 if (!pPdpeDst->n.u1Present)
3995 {
3996 GCPtr += 512 * _2M;
3997 continue; /* next PDPTE */
3998 }
3999
4000 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
4001 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
4002
4003 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
4004 {
4005 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
4006 GCPtr += 512 * _2M;
4007 cErrors++;
4008 continue;
4009 }
4010
4011 if (GCPhysPdeSrc != pShwPde->GCPhys)
4012 {
4013# if PGM_GST_TYPE == PGM_TYPE_AMD64
4014 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
4015# else
4016 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
4017# endif
4018 GCPtr += 512 * _2M;
4019 cErrors++;
4020 continue;
4021 }
4022
4023# if PGM_GST_TYPE == PGM_TYPE_AMD64
4024 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
4025 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
4026 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
4027 {
4028 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
4029 GCPtr += 512 * _2M;
4030 cErrors++;
4031 continue;
4032 }
4033# endif
4034
4035# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
4036 {
4037# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
4038# if PGM_GST_TYPE == PGM_TYPE_32BIT
4039 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
4040# if PGM_SHW_TYPE == PGM_TYPE_32BIT
4041 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVM->pgm.s);
4042# endif
4043# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
4044 /*
4045 * Iterate the shadow page directory.
4046 */
4047 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
4048 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
4049
4050 for (;
4051 iPDDst < cPDEs;
4052 iPDDst++, GCPtr += cIncrement)
4053 {
4054# if PGM_SHW_TYPE == PGM_TYPE_PAE
4055 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
4056# else
4057 const SHWPDE PdeDst = pPDDst->a[iPDDst];
4058# endif
4059 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
4060 {
4061 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
4062 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
4063 {
4064 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
4065 cErrors++;
4066 continue;
4067 }
4068 }
4069 else if ( (PdeDst.u & X86_PDE_P)
4070 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
4071 )
4072 {
4073 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
4074 PPGMPOOLPAGE pPoolPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
4075 if (!pPoolPage)
4076 {
4077 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
4078 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
4079 cErrors++;
4080 continue;
4081 }
4082 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
4083
4084 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
4085 {
4086 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
4087 GCPtr, (uint64_t)PdeDst.u));
4088 cErrors++;
4089 }
4090
4091 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
4092 {
4093 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
4094 GCPtr, (uint64_t)PdeDst.u));
4095 cErrors++;
4096 }
4097
4098 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
4099 if (!PdeSrc.n.u1Present)
4100 {
4101 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
4102 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
4103 cErrors++;
4104 continue;
4105 }
4106
4107 if ( !PdeSrc.b.u1Size
4108 || !fBigPagesSupported)
4109 {
4110 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
4111# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4112 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
4113# endif
4114 }
4115 else
4116 {
4117# if PGM_GST_TYPE == PGM_TYPE_32BIT
4118 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
4119 {
4120 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
4121 GCPtr, (uint64_t)PdeSrc.u));
4122 cErrors++;
4123 continue;
4124 }
4125# endif
4126 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
4127# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4128 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
4129# endif
4130 }
4131
4132 if ( pPoolPage->enmKind
4133 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
4134 {
4135 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
4136 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
4137 cErrors++;
4138 }
4139
4140 PPGMPAGE pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
4141 if (!pPhysPage)
4142 {
4143 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4144 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4145 cErrors++;
4146 continue;
4147 }
4148
4149 if (GCPhysGst != pPoolPage->GCPhys)
4150 {
4151 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
4152 GCPhysGst, pPoolPage->GCPhys, GCPtr));
4153 cErrors++;
4154 continue;
4155 }
4156
4157 if ( !PdeSrc.b.u1Size
4158 || !fBigPagesSupported)
4159 {
4160 /*
4161 * Page Table.
4162 */
4163 const GSTPT *pPTSrc;
4164 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
4165 if (RT_FAILURE(rc))
4166 {
4167 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4168 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4169 cErrors++;
4170 continue;
4171 }
4172 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
4173 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
4174 {
4175 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
4176 // (This problem will go away when/if we shadow multiple CR3s.)
4177 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4178 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4179 cErrors++;
4180 continue;
4181 }
4182 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4183 {
4184 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
4185 GCPtr, (uint64_t)PdeDst.u));
4186 cErrors++;
4187 continue;
4188 }
4189
4190 /* iterate the page table. */
4191# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4192 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
4193 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
4194# else
4195 const unsigned offPTSrc = 0;
4196# endif
4197 for (unsigned iPT = 0, off = 0;
4198 iPT < RT_ELEMENTS(pPTDst->a);
4199 iPT++, off += PAGE_SIZE)
4200 {
4201 const SHWPTE PteDst = pPTDst->a[iPT];
4202
4203 /* skip not-present entries. */
4204 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
4205 continue;
4206 Assert(PteDst.n.u1Present);
4207
4208 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
4209 if (!PteSrc.n.u1Present)
4210 {
4211# ifdef IN_RING3
4212 PGMAssertHandlerAndFlagsInSync(pVM);
4213 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
4214# endif
4215 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
4216 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
4217 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
4218 cErrors++;
4219 continue;
4220 }
4221
4222 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
4223# if 1 /** @todo sync accessed bit properly... */
4224 fIgnoreFlags |= X86_PTE_A;
4225# endif
4226
4227 /* match the physical addresses */
4228 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
4229 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
4230
4231# ifdef IN_RING3
4232 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4233 if (RT_FAILURE(rc))
4234 {
4235 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4236 {
4237 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4238 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4239 cErrors++;
4240 continue;
4241 }
4242 }
4243 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4244 {
4245 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4246 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4247 cErrors++;
4248 continue;
4249 }
4250# endif
4251
4252 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
4253 if (!pPhysPage)
4254 {
4255# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4256 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4257 {
4258 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4259 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4260 cErrors++;
4261 continue;
4262 }
4263# endif
4264 if (PteDst.n.u1Write)
4265 {
4266 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4267 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4268 cErrors++;
4269 }
4270 fIgnoreFlags |= X86_PTE_RW;
4271 }
4272 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4273 {
4274 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4275 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4276 cErrors++;
4277 continue;
4278 }
4279
4280 /* flags */
4281 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4282 {
4283 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4284 {
4285 if (PteDst.n.u1Write)
4286 {
4287 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4288 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4289 cErrors++;
4290 continue;
4291 }
4292 fIgnoreFlags |= X86_PTE_RW;
4293 }
4294 else
4295 {
4296 if (PteDst.n.u1Present)
4297 {
4298 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4299 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4300 cErrors++;
4301 continue;
4302 }
4303 fIgnoreFlags |= X86_PTE_P;
4304 }
4305 }
4306 else
4307 {
4308 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4309 {
4310 if (PteDst.n.u1Write)
4311 {
4312 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4313 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4314 cErrors++;
4315 continue;
4316 }
4317 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4318 {
4319 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4320 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4321 cErrors++;
4322 continue;
4323 }
4324 if (PteDst.n.u1Dirty)
4325 {
4326 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4327 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4328 cErrors++;
4329 }
4330# if 0 /** @todo sync access bit properly... */
4331 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4332 {
4333 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4334 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4335 cErrors++;
4336 }
4337 fIgnoreFlags |= X86_PTE_RW;
4338# else
4339 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4340# endif
4341 }
4342 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4343 {
4344 /* access bit emulation (not implemented). */
4345 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4346 {
4347 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4348 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4349 cErrors++;
4350 continue;
4351 }
4352 if (!PteDst.n.u1Accessed)
4353 {
4354 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4355 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4356 cErrors++;
4357 }
4358 fIgnoreFlags |= X86_PTE_P;
4359 }
4360# ifdef DEBUG_sandervl
4361 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4362# endif
4363 }
4364
4365 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4366 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4367 )
4368 {
4369 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4370 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4371 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4372 cErrors++;
4373 continue;
4374 }
4375 } /* foreach PTE */
4376 }
4377 else
4378 {
4379 /*
4380 * Big Page.
4381 */
4382 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4383 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4384 {
4385 if (PdeDst.n.u1Write)
4386 {
4387 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4388 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4389 cErrors++;
4390 continue;
4391 }
4392 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4393 {
4394 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4395 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4396 cErrors++;
4397 continue;
4398 }
4399# if 0 /** @todo sync access bit properly... */
4400 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4401 {
4402 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4403 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4404 cErrors++;
4405 }
4406 fIgnoreFlags |= X86_PTE_RW;
4407# else
4408 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4409# endif
4410 }
4411 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4412 {
4413 /* access bit emulation (not implemented). */
4414 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4415 {
4416 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4417 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4418 cErrors++;
4419 continue;
4420 }
4421 if (!PdeDst.n.u1Accessed)
4422 {
4423 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4424 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4425 cErrors++;
4426 }
4427 fIgnoreFlags |= X86_PTE_P;
4428 }
4429
4430 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4431 {
4432 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4433 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4434 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4435 cErrors++;
4436 }
4437
4438 /* iterate the page table. */
4439 for (unsigned iPT = 0, off = 0;
4440 iPT < RT_ELEMENTS(pPTDst->a);
4441 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4442 {
4443 const SHWPTE PteDst = pPTDst->a[iPT];
4444
4445 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4446 {
4447 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4448 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4449 cErrors++;
4450 }
4451
4452 /* skip not-present entries. */
4453 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4454 continue;
4455
4456 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4457
4458 /* match the physical addresses */
4459 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4460
4461# ifdef IN_RING3
4462 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4463 if (RT_FAILURE(rc))
4464 {
4465 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4466 {
4467 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4468 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4469 cErrors++;
4470 }
4471 }
4472 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4473 {
4474 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4475 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4476 cErrors++;
4477 continue;
4478 }
4479# endif
4480 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
4481 if (!pPhysPage)
4482 {
4483# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4484 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4485 {
4486 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4487 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4488 cErrors++;
4489 continue;
4490 }
4491# endif
4492 if (PteDst.n.u1Write)
4493 {
4494 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4495 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4496 cErrors++;
4497 }
4498 fIgnoreFlags |= X86_PTE_RW;
4499 }
4500 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4501 {
4502 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4503 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4504 cErrors++;
4505 continue;
4506 }
4507
4508 /* flags */
4509 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4510 {
4511 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4512 {
4513 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4514 {
4515 if (PteDst.n.u1Write)
4516 {
4517 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4518 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4519 cErrors++;
4520 continue;
4521 }
4522 fIgnoreFlags |= X86_PTE_RW;
4523 }
4524 }
4525 else
4526 {
4527 if (PteDst.n.u1Present)
4528 {
4529 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4530 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4531 cErrors++;
4532 continue;
4533 }
4534 fIgnoreFlags |= X86_PTE_P;
4535 }
4536 }
4537
4538 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4539 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4540 )
4541 {
4542 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4543 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4544 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4545 cErrors++;
4546 continue;
4547 }
4548 } /* for each PTE */
4549 }
4550 }
4551 /* not present */
4552
4553 } /* for each PDE */
4554
4555 } /* for each PDPTE */
4556
4557 } /* for each PML4E */
4558
4559# ifdef DEBUG
4560 if (cErrors)
4561 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4562# endif
4563
4564#endif /* GST == 32BIT, PAE or AMD64 */
4565 return cErrors;
4566
4567#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4568}
4569#endif /* VBOX_STRICT */
4570
4571
4572/**
4573 * Sets up the CR3 for shadow paging
4574 *
4575 * @returns Strict VBox status code.
4576 * @retval VINF_SUCCESS.
4577 *
4578 * @param pVM VM handle.
4579 * @param GCPhysCR3 The physical address in the CR3 register.
4580 */
4581PGM_BTH_DECL(int, MapCR3)(PVM pVM, RTGCPHYS GCPhysCR3)
4582{
4583 /* Update guest paging info. */
4584#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4585 || PGM_GST_TYPE == PGM_TYPE_PAE \
4586 || PGM_GST_TYPE == PGM_TYPE_AMD64
4587
4588 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4589
4590 /*
4591 * Map the page CR3 points at.
4592 */
4593 RTHCPTR HCPtrGuestCR3;
4594 RTHCPHYS HCPhysGuestCR3;
4595# ifdef VBOX_WITH_NEW_PHYS_CODE
4596 /** @todo this needs some reworking. current code is just a big hack. */
4597# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4598# if 1 /* temp hack */
4599 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
4600 return VINF_PGM_SYNC_CR3;
4601# else
4602 AssertFailedReturn(VERR_INTERNAL_ERROR);
4603# endif
4604 int rc = VERR_INTERNAL_ERROR;
4605# else
4606 pgmLock(pVM);
4607 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4608 AssertReturn(pPage, VERR_INTERNAL_ERROR);
4609 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3);
4610 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPage);
4611 pgmUnlock(pVM);
4612# endif
4613# else /* !VBOX_WITH_NEW_PHYS_CODE */
4614 int rc = pgmRamGCPhys2HCPtrAndHCPhys(&pVM->pgm.s, GCPhysCR3 & GST_CR3_PAGE_MASK, &HCPtrGuestCR3, &HCPhysGuestCR3);
4615# endif /* !VBOX_WITH_NEW_PHYS_CODE */
4616 if (RT_SUCCESS(rc))
4617 {
4618 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4619 if (RT_SUCCESS(rc))
4620 {
4621# ifdef IN_RC
4622 PGM_INVL_PG(pVM->pgm.s.GCPtrCR3Mapping);
4623# endif
4624# if PGM_GST_TYPE == PGM_TYPE_32BIT
4625 pVM->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4626# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4627 pVM->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4628# endif
4629 pVM->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))pVM->pgm.s.GCPtrCR3Mapping;
4630
4631# elif PGM_GST_TYPE == PGM_TYPE_PAE
4632 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4633 pVM->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4634# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4635 pVM->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4636# endif
4637 pVM->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RCPTRTYPE(uint8_t *))pVM->pgm.s.GCPtrCR3Mapping + off);
4638 Log(("Cached mapping %RRv\n", pVM->pgm.s.pGstPaePdptRC));
4639
4640 /*
4641 * Map the 4 PDs too.
4642 */
4643 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVM->pgm.s);
4644 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4645 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4646 {
4647 if (pGuestPDPT->a[i].n.u1Present)
4648 {
4649 RTHCPTR HCPtr;
4650 RTHCPHYS HCPhys;
4651 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4652# ifdef VBOX_WITH_NEW_PHYS_CODE
4653# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4654 AssertFailedReturn(VERR_INTERNAL_ERROR);
4655 int rc2 = VERR_INTERNAL_ERROR;
4656# else
4657 pgmLock(pVM);
4658 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4659 AssertReturn(pPage, VERR_INTERNAL_ERROR);
4660 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4661 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4662 pgmUnlock(pVM);
4663# endif
4664# else /* !VBOX_WITH_NEW_PHYS_CODE */
4665 int rc2 = pgmRamGCPhys2HCPtrAndHCPhys(&pVM->pgm.s, GCPhys, &HCPtr, &HCPhys);
4666# endif /* !VBOX_WITH_NEW_PHYS_CODE */
4667 if (RT_SUCCESS(rc2))
4668 {
4669 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4670 AssertRCReturn(rc, rc);
4671
4672 pVM->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4673# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4674 pVM->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4675# endif
4676 pVM->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))GCPtr;
4677 pVM->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4678 PGM_INVL_PG(GCPtr); /** @todo This ends up calling HWACCMInvalidatePage, is that correct? */
4679 continue;
4680 }
4681 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4682 }
4683
4684 pVM->pgm.s.apGstPaePDsR3[i] = 0;
4685# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4686 pVM->pgm.s.apGstPaePDsR0[i] = 0;
4687# endif
4688 pVM->pgm.s.apGstPaePDsRC[i] = 0;
4689 pVM->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4690 PGM_INVL_PG(GCPtr); /** @todo this shouldn't be necessary? */
4691 }
4692
4693# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4694 pVM->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4695# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4696 pVM->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4697# endif
4698# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4699 if (!HWACCMIsNestedPagingActive(pVM))
4700 {
4701 /*
4702 * Update the shadow root page as well since that's not fixed.
4703 */
4704 /** @todo Move this into PGMAllBth.h. */
4705 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4706 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4707 {
4708 /* It might have been freed already by a pool flush (see e.g. PGMR3MappingsUnfix). */
4709 /** @todo Coordinate this better with the pool. */
4710 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3)->enmKind != PGMPOOLKIND_FREE)
4711 pgmPoolFreeByPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3), PGMPOOL_IDX_AMD64_CR3, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->GCPhys >> PAGE_SHIFT);
4712 pVM->pgm.s.pShwPageCR3R3 = 0;
4713 pVM->pgm.s.pShwPageCR3R0 = 0;
4714 pVM->pgm.s.pShwRootR3 = 0;
4715# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4716 pVM->pgm.s.pShwRootR0 = 0;
4717# endif
4718 pVM->pgm.s.HCPhysShwCR3 = 0;
4719 }
4720
4721 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4722 rc = pgmPoolAlloc(pVM, GCPhysCR3, PGMPOOLKIND_64BIT_PML4, PGMPOOL_IDX_AMD64_CR3, GCPhysCR3 >> PAGE_SHIFT, &pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4723 if (rc == VERR_PGM_POOL_FLUSHED)
4724 {
4725 Log(("MapCR3: PGM pool flushed -> signal sync cr3\n"));
4726 Assert(VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
4727 return VINF_PGM_SYNC_CR3;
4728 }
4729 AssertRCReturn(rc, rc);
4730# ifdef IN_RING0
4731 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4732# else
4733 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4734# endif
4735 pVM->pgm.s.pShwRootR3 = (R3PTRTYPE(void *))pVM->pgm.s.CTX_SUFF(pShwPageCR3)->pvPageR3;
4736 Assert(pVM->pgm.s.pShwRootR3);
4737# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4738 pVM->pgm.s.pShwRootR0 = (R0PTRTYPE(void *))PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4739# endif
4740 pVM->pgm.s.HCPhysShwCR3 = pVM->pgm.s.CTX_SUFF(pShwPageCR3)->Core.Key;
4741 rc = VINF_SUCCESS; /* clear it - pgmPoolAlloc returns hints. */
4742 }
4743# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
4744# endif
4745 }
4746 else
4747 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4748 }
4749 else
4750 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4751
4752#else /* prot/real stub */
4753 int rc = VINF_SUCCESS;
4754#endif
4755
4756#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4757 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4758# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4759 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4760 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4761 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4762 && PGM_GST_TYPE != PGM_TYPE_PROT))
4763
4764 Assert(!HWACCMIsNestedPagingActive(pVM));
4765
4766 /*
4767 * Update the shadow root page as well since that's not fixed.
4768 */
4769 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4770 PPGMPOOLPAGE pOldShwPageCR3 = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
4771 uint32_t iOldShwUserTable = pVM->pgm.s.iShwUserTable;
4772 uint32_t iOldShwUser = pVM->pgm.s.iShwUser;
4773 PPGMPOOLPAGE pNewShwPageCR3;
4774
4775 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4776 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3);
4777 if (rc == VERR_PGM_POOL_FLUSHED)
4778 {
4779 Log(("MapCR3: PGM pool flushed -> signal sync cr3\n"));
4780 Assert(VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
4781 return VINF_PGM_SYNC_CR3;
4782 }
4783 AssertRCReturn(rc, rc);
4784 rc = VINF_SUCCESS;
4785
4786 /* Mark the page as locked; disallow flushing. */
4787 pgmPoolLockPage(pPool, pNewShwPageCR3);
4788
4789# ifdef IN_RC
4790 /** NOTE: We can't deal with jumps to ring 3 here as we're now in an inconsistent state! */
4791 bool fLog = VMMGCLogDisable(pVM);
4792# endif
4793
4794 pVM->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4795 pVM->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4796 pVM->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4797# ifdef IN_RING0
4798 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4799 pVM->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4800# elif defined(IN_RC)
4801 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4802 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4803# else
4804 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4805 pVM->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4806# endif
4807
4808# ifndef PGM_WITHOUT_MAPPINGS
4809 /* Apply all hypervisor mappings to the new CR3.
4810 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4811 * make sure we check for conflicts in the new CR3 root.
4812 */
4813# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4814 Assert(VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL) || VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
4815# endif
4816 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4817 AssertRCReturn(rc, rc);
4818# endif
4819
4820 /* Set the current hypervisor CR3. */
4821 CPUMSetHyperCR3(pVM, PGMGetHyperCR3(pVM));
4822
4823# ifdef IN_RC
4824 VMMGCLogRestore(pVM, fLog);
4825# endif
4826
4827 /* Clean up the old CR3 root. */
4828 if (pOldShwPageCR3)
4829 {
4830 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4831# ifndef PGM_WITHOUT_MAPPINGS
4832 /* Remove the hypervisor mappings from the shadow page table. */
4833 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4834# endif
4835 /* Mark the page as unlocked; allow flushing again. */
4836 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4837
4838 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4839 }
4840
4841# endif
4842#endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
4843
4844 return rc;
4845}
4846
4847/**
4848 * Unmaps the shadow CR3.
4849 *
4850 * @returns VBox status, no specials.
4851 * @param pVM VM handle.
4852 */
4853PGM_BTH_DECL(int, UnmapCR3)(PVM pVM)
4854{
4855 LogFlow(("UnmapCR3\n"));
4856
4857 int rc = VINF_SUCCESS;
4858
4859 /* Update guest paging info. */
4860#if PGM_GST_TYPE == PGM_TYPE_32BIT
4861 pVM->pgm.s.pGst32BitPdR3 = 0;
4862#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4863 pVM->pgm.s.pGst32BitPdR0 = 0;
4864#endif
4865 pVM->pgm.s.pGst32BitPdRC = 0;
4866
4867#elif PGM_GST_TYPE == PGM_TYPE_PAE
4868 pVM->pgm.s.pGstPaePdptR3 = 0;
4869# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4870 pVM->pgm.s.pGstPaePdptR0 = 0;
4871# endif
4872 pVM->pgm.s.pGstPaePdptRC = 0;
4873 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4874 {
4875 pVM->pgm.s.apGstPaePDsR3[i] = 0;
4876# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4877 pVM->pgm.s.apGstPaePDsR0[i] = 0;
4878# endif
4879 pVM->pgm.s.apGstPaePDsRC[i] = 0;
4880 pVM->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4881 }
4882
4883#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4884 pVM->pgm.s.pGstAmd64Pml4R3 = 0;
4885# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4886 pVM->pgm.s.pGstAmd64Pml4R0 = 0;
4887# endif
4888# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4889 if (!HWACCMIsNestedPagingActive(pVM))
4890 {
4891 pVM->pgm.s.pShwRootR3 = 0;
4892# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4893 pVM->pgm.s.pShwRootR0 = 0;
4894# endif
4895 pVM->pgm.s.HCPhysShwCR3 = 0;
4896 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4897 {
4898 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4899 pgmPoolFreeByPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3), PGMPOOL_IDX_AMD64_CR3, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->GCPhys >> PAGE_SHIFT);
4900 pVM->pgm.s.pShwPageCR3R3 = 0;
4901 pVM->pgm.s.pShwPageCR3R0 = 0;
4902 }
4903 }
4904# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
4905
4906#else /* prot/real mode stub */
4907 /* nothing to do */
4908#endif
4909
4910#if defined(VBOX_WITH_PGMPOOL_PAGING_ONLY) && !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4911 /* Update shadow paging info. */
4912# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4913 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4914 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4915
4916# if PGM_GST_TYPE != PGM_TYPE_REAL
4917 Assert(!HWACCMIsNestedPagingActive(pVM));
4918# endif
4919
4920# ifndef PGM_WITHOUT_MAPPINGS
4921 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4922 /* Remove the hypervisor mappings from the shadow page table. */
4923 pgmMapDeactivateCR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4924# endif
4925
4926 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4927 {
4928 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4929
4930 Assert(pVM->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4931
4932 /* Mark the page as unlocked; allow flushing again. */
4933 pgmPoolUnlockPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4934
4935 pgmPoolFreeByPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3), pVM->pgm.s.iShwUser, pVM->pgm.s.iShwUserTable);
4936 pVM->pgm.s.pShwPageCR3R3 = 0;
4937 pVM->pgm.s.pShwPageCR3R0 = 0;
4938 pVM->pgm.s.pShwPageCR3RC = 0;
4939 pVM->pgm.s.iShwUser = 0;
4940 pVM->pgm.s.iShwUserTable = 0;
4941 }
4942# endif
4943#endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY && !IN_RC*/
4944
4945 return rc;
4946}
4947
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette