VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 17693

Last change on this file since 17693 was 17667, checked in by vboxsync, 16 years ago

pgmPoolAlloc no longer fails with non-fatal errors.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 188.9 KB
Line 
1/* $Id: PGMAllBth.h 17667 2009-03-11 09:35:22Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27__BEGIN_DECLS
28PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42PGM_BTH_DECL(int, MapCR3)(PVM pVM, RTGCPHYS GCPhysCR3);
43PGM_BTH_DECL(int, UnmapCR3)(PVM pVM);
44__END_DECLS
45
46
47/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
48#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
49# error "Invalid combination; PAE guest implies PAE shadow"
50#endif
51
52#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
53 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
54# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
55#endif
56
57#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
58 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
59# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
60#endif
61
62#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
63 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
64# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
65#endif
66
67#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
68# define PGM_WITHOUT_MAPPINGS
69#endif
70
71
72#ifndef IN_RING3
73/**
74 * #PF Handler for raw-mode guest execution.
75 *
76 * @returns VBox status code (appropriate for trap handling and GC return).
77 * @param pVM VM Handle.
78 * @param uErr The trap error code.
79 * @param pRegFrame Trap register frame.
80 * @param pvFault The fault address.
81 */
82PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
83{
84# if defined(IN_RC) && defined(VBOX_STRICT)
85 PGMDynCheckLocks(pVM);
86# endif
87
88# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
89 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
90 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
91
92# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
93 /*
94 * Hide the instruction fetch trap indicator for now.
95 */
96 /** @todo NXE will change this and we must fix NXE in the switcher too! */
97 if (uErr & X86_TRAP_PF_ID)
98 {
99 uErr &= ~X86_TRAP_PF_ID;
100 TRPMSetErrorCode(pVM, uErr);
101 }
102# endif
103
104 /*
105 * Get PDs.
106 */
107 int rc;
108# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
109# if PGM_GST_TYPE == PGM_TYPE_32BIT
110 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
111 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
112
113# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
114
115# if PGM_GST_TYPE == PGM_TYPE_PAE
116 unsigned iPDSrc;
117 X86PDPE PdpeSrc;
118 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
119
120# elif PGM_GST_TYPE == PGM_TYPE_AMD64
121 unsigned iPDSrc;
122 PX86PML4E pPml4eSrc;
123 X86PDPE PdpeSrc;
124 PGSTPD pPDSrc;
125
126 pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
127 Assert(pPml4eSrc);
128# endif
129
130 /* Quick check for a valid guest trap. (PAE & AMD64) */
131 if (!pPDSrc)
132 {
133# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
134 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK));
135# else
136 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK));
137# endif
138 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2GuestTrap; });
139 TRPMSetErrorCode(pVM, uErr);
140 return VINF_EM_RAW_GUEST_TRAP;
141 }
142# endif
143
144# else /* !PGM_WITH_PAGING */
145 PGSTPD pPDSrc = NULL;
146 const unsigned iPDSrc = 0;
147# endif /* !PGM_WITH_PAGING */
148
149
150# if PGM_SHW_TYPE == PGM_TYPE_32BIT
151 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
152 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVM->pgm.s);
153
154# elif PGM_SHW_TYPE == PGM_TYPE_PAE
155 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
156
157 PX86PDPAE pPDDst;
158# if PGM_GST_TYPE != PGM_TYPE_PAE
159 X86PDPE PdpeSrc;
160
161 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
162 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
163# endif
164 rc = pgmShwSyncPaePDPtr(pVM, pvFault, &PdpeSrc, &pPDDst);
165 if (rc != VINF_SUCCESS)
166 {
167 AssertRC(rc);
168 return rc;
169 }
170 Assert(pPDDst);
171
172# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
173 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
174 PX86PDPAE pPDDst;
175# if PGM_GST_TYPE == PGM_TYPE_PROT
176 /* AMD-V nested paging */
177 X86PML4E Pml4eSrc;
178 X86PDPE PdpeSrc;
179 PX86PML4E pPml4eSrc = &Pml4eSrc;
180
181 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
182 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
183 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
184# endif
185
186 rc = pgmShwSyncLongModePDPtr(pVM, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
187 if (rc != VINF_SUCCESS)
188 {
189 AssertRC(rc);
190 return rc;
191 }
192 Assert(pPDDst);
193
194# elif PGM_SHW_TYPE == PGM_TYPE_EPT
195 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
196 PEPTPD pPDDst;
197
198 rc = pgmShwGetEPTPDPtr(pVM, pvFault, NULL, &pPDDst);
199 if (rc != VINF_SUCCESS)
200 {
201 AssertRC(rc);
202 return rc;
203 }
204 Assert(pPDDst);
205# endif
206
207# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
208 /*
209 * If we successfully correct the write protection fault due to dirty bit
210 * tracking, or this page fault is a genuine one, then return immediately.
211 */
212 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
213 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
214 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
215 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
216 || rc == VINF_EM_RAW_GUEST_TRAP)
217 {
218 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
219 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVM->pgm.s.StatRZTrap0eTime2GuestTrap; });
220 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
221 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
222 }
223
224 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0ePD[iPDSrc]);
225# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
226
227 /*
228 * A common case is the not-present error caused by lazy page table syncing.
229 *
230 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
231 * so we can safely assume that the shadow PT is present when calling SyncPage later.
232 *
233 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
234 * of mapping conflict and defer to SyncCR3 in R3.
235 * (Again, we do NOT support access handlers for non-present guest pages.)
236 *
237 */
238# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
239 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
240# else
241 GSTPDE PdeSrc;
242 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
243 PdeSrc.n.u1Present = 1;
244 PdeSrc.n.u1Write = 1;
245 PdeSrc.n.u1Accessed = 1;
246 PdeSrc.n.u1User = 1;
247# endif
248 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
249 && !pPDDst->a[iPDDst].n.u1Present
250 && PdeSrc.n.u1Present
251 )
252
253 {
254 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2SyncPT; });
255 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
256 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
257 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, pvFault);
258 if (RT_SUCCESS(rc))
259 {
260 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
261 return rc;
262 }
263 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
264 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
265 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
266 return VINF_PGM_SYNC_CR3;
267 }
268
269# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
270 /*
271 * Check if this address is within any of our mappings.
272 *
273 * This is *very* fast and it's gonna save us a bit of effort below and prevent
274 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
275 * (BTW, it's impossible to have physical access handlers in a mapping.)
276 */
277 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
278 {
279 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
280 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
281 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
282 {
283 if (pvFault < pMapping->GCPtr)
284 break;
285 if (pvFault - pMapping->GCPtr < pMapping->cb)
286 {
287 /*
288 * The first thing we check is if we've got an undetected conflict.
289 */
290 if (!pVM->pgm.s.fMappingsFixed)
291 {
292 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
293 while (iPT-- > 0)
294 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
295 {
296 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eConflicts);
297 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
298 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
299 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
300 return VINF_PGM_SYNC_CR3;
301 }
302 }
303
304 /*
305 * Check if the fault address is in a virtual page access handler range.
306 */
307 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
308 if ( pCur
309 && pvFault - pCur->Core.Key < pCur->cb
310 && uErr & X86_TRAP_PF_RW)
311 {
312# ifdef IN_RC
313 STAM_PROFILE_START(&pCur->Stat, h);
314 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
315 STAM_PROFILE_STOP(&pCur->Stat, h);
316# else
317 AssertFailed();
318 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
319# endif
320 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersMapping);
321 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
322 return rc;
323 }
324
325 /*
326 * Pretend we're not here and let the guest handle the trap.
327 */
328 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
329 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eGuestPFMapping);
330 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
331 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
332 return VINF_EM_RAW_GUEST_TRAP;
333 }
334 }
335 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
336 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
337# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
338
339 /*
340 * Check if this fault address is flagged for special treatment,
341 * which means we'll have to figure out the physical address and
342 * check flags associated with it.
343 *
344 * ASSUME that we can limit any special access handling to pages
345 * in page tables which the guest believes to be present.
346 */
347 if (PdeSrc.n.u1Present)
348 {
349 RTGCPHYS GCPhys = NIL_RTGCPHYS;
350
351# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
352# if PGM_GST_TYPE == PGM_TYPE_AMD64
353 bool fBigPagesSupported = true;
354# else
355 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
356# endif
357 if ( PdeSrc.b.u1Size
358 && fBigPagesSupported)
359 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
360 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
361 else
362 {
363 PGSTPT pPTSrc;
364 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
365 if (RT_SUCCESS(rc))
366 {
367 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
368 if (pPTSrc->a[iPTESrc].n.u1Present)
369 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
370 }
371 }
372# else
373 /* No paging so the fault address is the physical address */
374 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
375# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
376
377 /*
378 * If we have a GC address we'll check if it has any flags set.
379 */
380 if (GCPhys != NIL_RTGCPHYS)
381 {
382 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
383
384 PPGMPAGE pPage;
385 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
386 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
387 {
388 if ( PGM_PAGE_HAS_ACTIVE_PHYSICAL_HANDLERS(pPage)
389 || PGM_PAGE_HAS_ACTIVE_VIRTUAL_HANDLERS(pPage))
390 {
391 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
392 {
393 /*
394 * Physical page access handler.
395 */
396 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
397 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
398 if (pCur)
399 {
400# ifdef PGM_SYNC_N_PAGES
401 /*
402 * If the region is write protected and we got a page not present fault, then sync
403 * the pages. If the fault was caused by a read, then restart the instruction.
404 * In case of write access continue to the GC write handler.
405 *
406 * ASSUMES that there is only one handler per page or that they have similar write properties.
407 */
408 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
409 && !(uErr & X86_TRAP_PF_P))
410 {
411 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
412 if ( RT_FAILURE(rc)
413 || !(uErr & X86_TRAP_PF_RW)
414 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
415 {
416 AssertRC(rc);
417 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
418 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
419 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
420 return rc;
421 }
422 }
423# endif
424
425 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
426 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
427 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
428
429# if defined(IN_RC) || defined(IN_RING0)
430 if (pCur->CTX_SUFF(pfnHandler))
431 {
432 STAM_PROFILE_START(&pCur->Stat, h);
433 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pCur->CTX_SUFF(pvUser));
434 STAM_PROFILE_STOP(&pCur->Stat, h);
435 }
436 else
437# endif
438 rc = VINF_EM_RAW_EMULATE_INSTR;
439 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersPhysical);
440 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
441 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndPhys; });
442 return rc;
443 }
444 }
445# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
446 else
447 {
448# ifdef PGM_SYNC_N_PAGES
449 /*
450 * If the region is write protected and we got a page not present fault, then sync
451 * the pages. If the fault was caused by a read, then restart the instruction.
452 * In case of write access continue to the GC write handler.
453 */
454 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
455 && !(uErr & X86_TRAP_PF_P))
456 {
457 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
458 if ( RT_FAILURE(rc)
459 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
460 || !(uErr & X86_TRAP_PF_RW))
461 {
462 AssertRC(rc);
463 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
464 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
465 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
466 return rc;
467 }
468 }
469# endif
470 /*
471 * Ok, it's an virtual page access handler.
472 *
473 * Since it's faster to search by address, we'll do that first
474 * and then retry by GCPhys if that fails.
475 */
476 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
477 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
478 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
479 */
480 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
481 if (pCur)
482 {
483 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
484 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
485 || !(uErr & X86_TRAP_PF_P)
486 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
487 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
488
489 if ( pvFault - pCur->Core.Key < pCur->cb
490 && ( uErr & X86_TRAP_PF_RW
491 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
492 {
493# ifdef IN_RC
494 STAM_PROFILE_START(&pCur->Stat, h);
495 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
496 STAM_PROFILE_STOP(&pCur->Stat, h);
497# else
498 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
499# endif
500 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtual);
501 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
502 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
503 return rc;
504 }
505 /* Unhandled part of a monitored page */
506 }
507 else
508 {
509 /* Check by physical address. */
510 PPGMVIRTHANDLER pCur;
511 unsigned iPage;
512 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
513 &pCur, &iPage);
514 Assert(RT_SUCCESS(rc) || !pCur);
515 if ( pCur
516 && ( uErr & X86_TRAP_PF_RW
517 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
518 {
519 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
520# ifdef IN_RC
521 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
522 Assert(off < pCur->cb);
523 STAM_PROFILE_START(&pCur->Stat, h);
524 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
525 STAM_PROFILE_STOP(&pCur->Stat, h);
526# else
527 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
528# endif
529 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
530 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
531 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
532 return rc;
533 }
534 }
535 }
536# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
537
538 /*
539 * There is a handled area of the page, but this fault doesn't belong to it.
540 * We must emulate the instruction.
541 *
542 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
543 * we first check if this was a page-not-present fault for a page with only
544 * write access handlers. Restart the instruction if it wasn't a write access.
545 */
546 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersUnhandled);
547
548 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
549 && !(uErr & X86_TRAP_PF_P))
550 {
551 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
552 if ( RT_FAILURE(rc)
553 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
554 || !(uErr & X86_TRAP_PF_RW))
555 {
556 AssertRC(rc);
557 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
558 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
559 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
560 return rc;
561 }
562 }
563
564 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
565 * It's writing to an unhandled part of the LDT page several million times.
566 */
567 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
568 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
569 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
570 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndUnhandled; });
571 return rc;
572 } /* if any kind of handler */
573
574# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
575 if (uErr & X86_TRAP_PF_P)
576 {
577 /*
578 * The page isn't marked, but it might still be monitored by a virtual page access handler.
579 * (ASSUMES no temporary disabling of virtual handlers.)
580 */
581 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
582 * we should correct both the shadow page table and physical memory flags, and not only check for
583 * accesses within the handler region but for access to pages with virtual handlers. */
584 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
585 if (pCur)
586 {
587 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
588 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
589 || !(uErr & X86_TRAP_PF_P)
590 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
591 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
592
593 if ( pvFault - pCur->Core.Key < pCur->cb
594 && ( uErr & X86_TRAP_PF_RW
595 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
596 {
597# ifdef IN_RC
598 STAM_PROFILE_START(&pCur->Stat, h);
599 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
600 STAM_PROFILE_STOP(&pCur->Stat, h);
601# else
602 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
603# endif
604 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
605 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
606 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
607 return rc;
608 }
609 }
610 }
611# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
612 }
613 else
614 {
615 /*
616 * When the guest accesses invalid physical memory (e.g. probing
617 * of RAM or accessing a remapped MMIO range), then we'll fall
618 * back to the recompiler to emulate the instruction.
619 */
620 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
621 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersInvalid);
622 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
623 return VINF_EM_RAW_EMULATE_INSTR;
624 }
625
626 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
627
628# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
629 /*
630 * We are here only if page is present in Guest page tables and
631 * trap is not handled by our handlers.
632 *
633 * Check it for page out-of-sync situation.
634 */
635 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
636
637 if (!(uErr & X86_TRAP_PF_P))
638 {
639 /*
640 * Page is not present in our page tables.
641 * Try to sync it!
642 * BTW, fPageShw is invalid in this branch!
643 */
644 if (uErr & X86_TRAP_PF_US)
645 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
646 else /* supervisor */
647 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
648
649# if defined(LOG_ENABLED) && !defined(IN_RING0)
650 RTGCPHYS GCPhys;
651 uint64_t fPageGst;
652 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
653 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%RGp scan=%d\n",
654 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)));
655# endif /* LOG_ENABLED */
656
657# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
658 if (CPUMGetGuestCPL(pVM, pRegFrame) == 0)
659 {
660 uint64_t fPageGst;
661 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
662 if ( RT_SUCCESS(rc)
663 && !(fPageGst & X86_PTE_US))
664 {
665 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
666 if ( pvFault == (RTGCPTR)pRegFrame->eip
667 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
668# ifdef CSAM_DETECT_NEW_CODE_PAGES
669 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
670 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)) /* any new code we encounter here */
671# endif /* CSAM_DETECT_NEW_CODE_PAGES */
672 )
673 {
674 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
675 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
676 if (rc != VINF_SUCCESS)
677 {
678 /*
679 * CSAM needs to perform a job in ring 3.
680 *
681 * Sync the page before going to the host context; otherwise we'll end up in a loop if
682 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
683 */
684 LogFlow(("CSAM ring 3 job\n"));
685 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, 1, uErr);
686 AssertRC(rc2);
687
688 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
689 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2CSAM; });
690 return rc;
691 }
692 }
693# ifdef CSAM_DETECT_NEW_CODE_PAGES
694 else if ( uErr == X86_TRAP_PF_RW
695 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
696 && pRegFrame->ecx < 0x10000)
697 {
698 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
699 * to detect loading of new code pages.
700 */
701
702 /*
703 * Decode the instruction.
704 */
705 RTGCPTR PC;
706 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
707 if (rc == VINF_SUCCESS)
708 {
709 DISCPUSTATE Cpu;
710 uint32_t cbOp;
711 rc = EMInterpretDisasOneEx(pVM, PC, pRegFrame, &Cpu, &cbOp);
712
713 /* For now we'll restrict this to rep movsw/d instructions */
714 if ( rc == VINF_SUCCESS
715 && Cpu.pCurInstr->opcode == OP_MOVSWD
716 && (Cpu.prefix & PREFIX_REP))
717 {
718 CSAMMarkPossibleCodePage(pVM, pvFault);
719 }
720 }
721 }
722# endif /* CSAM_DETECT_NEW_CODE_PAGES */
723
724 /*
725 * Mark this page as safe.
726 */
727 /** @todo not correct for pages that contain both code and data!! */
728 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
729 CSAMMarkPage(pVM, (RTRCPTR)pvFault, true);
730 }
731 }
732# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
733 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
734 if (RT_SUCCESS(rc))
735 {
736 /* The page was successfully synced, return to the guest. */
737 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
738 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSync; });
739 return VINF_SUCCESS;
740 }
741 }
742 else /* uErr & X86_TRAP_PF_P: */
743 {
744 /*
745 * Write protected pages is make writable when the guest makes the first
746 * write to it. This happens for pages that are shared, write monitored
747 * and not yet allocated.
748 *
749 * Also, a side effect of not flushing global PDEs are out of sync pages due
750 * to physical monitored regions, that are no longer valid.
751 * Assume for now it only applies to the read/write flag.
752 */
753 if (RT_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
754 {
755# ifdef VBOX_WITH_NEW_PHYS_CODE
756 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
757 {
758 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n",
759 GCPhys, pPage, pvFault, uErr));
760 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
761 if (rc != VINF_SUCCESS)
762 {
763 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
764 return rc;
765 }
766 }
767 /// @todo count the above case; else
768# endif /* VBOX_WITH_NEW_PHYS_CODE */
769 if (uErr & X86_TRAP_PF_US)
770 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
771 else /* supervisor */
772 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
773
774 /*
775 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
776 * page is not present, which is not true in this case.
777 */
778 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, 1, uErr);
779 if (RT_SUCCESS(rc))
780 {
781 /*
782 * Page was successfully synced, return to guest.
783 */
784# ifdef VBOX_STRICT
785 RTGCPHYS GCPhys;
786 uint64_t fPageGst;
787 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
788 Assert(RT_SUCCESS(rc) && fPageGst & X86_PTE_RW);
789 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
790
791 uint64_t fPageShw;
792 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
793 AssertMsg(RT_SUCCESS(rc) && fPageShw & X86_PTE_RW, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
794# endif /* VBOX_STRICT */
795 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
796 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
797 return VINF_SUCCESS;
798 }
799
800 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
801 if ( CPUMGetGuestCPL(pVM, pRegFrame) == 0
802 && ((CPUMGetGuestCR0(pVM) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG)
803 && (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P))
804 {
805 uint64_t fPageGst;
806 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
807 if ( RT_SUCCESS(rc)
808 && !(fPageGst & X86_PTE_RW))
809 {
810 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
811 if (RT_SUCCESS(rc))
812 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eWPEmulInRZ);
813 else
814 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eWPEmulToR3);
815 return rc;
816 }
817 AssertMsgFailed(("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
818 }
819 }
820
821# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
822# ifdef VBOX_STRICT
823 /*
824 * Check for VMM page flags vs. Guest page flags consistency.
825 * Currently only for debug purposes.
826 */
827 if (RT_SUCCESS(rc))
828 {
829 /* Get guest page flags. */
830 uint64_t fPageGst;
831 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
832 if (RT_SUCCESS(rc))
833 {
834 uint64_t fPageShw;
835 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
836
837 /*
838 * Compare page flags.
839 * Note: we have AVL, A, D bits desynched.
840 */
841 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
842 ("Page flags mismatch! pvFault=%RGv GCPhys=%RGp fPageShw=%08llx fPageGst=%08llx\n", pvFault, GCPhys, fPageShw, fPageGst));
843 }
844 else
845 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
846 }
847 else
848 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
849# endif /* VBOX_STRICT */
850# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
851 }
852 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
853# endif /* PGM_OUT_OF_SYNC_IN_GC */
854 }
855 else /* GCPhys == NIL_RTGCPHYS */
856 {
857 /*
858 * Page not present in Guest OS or invalid page table address.
859 * This is potential virtual page access handler food.
860 *
861 * For the present we'll say that our access handlers don't
862 * work for this case - we've already discarded the page table
863 * not present case which is identical to this.
864 *
865 * When we perchance find we need this, we will probably have AVL
866 * trees (offset based) to operate on and we can measure their speed
867 * agains mapping a page table and probably rearrange this handling
868 * a bit. (Like, searching virtual ranges before checking the
869 * physical address.)
870 */
871 }
872 }
873 /* else: !present (guest) */
874
875
876# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
877 /*
878 * Conclusion, this is a guest trap.
879 */
880 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
881 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eGuestPFUnh);
882 return VINF_EM_RAW_GUEST_TRAP;
883# else
884 /* present, but not a monitored page; perhaps the guest is probing physical memory */
885 return VINF_EM_RAW_EMULATE_INSTR;
886# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
887
888
889# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
890
891 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
892 return VERR_INTERNAL_ERROR;
893# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
894}
895#endif /* !IN_RING3 */
896
897
898/**
899 * Emulation of the invlpg instruction.
900 *
901 *
902 * @returns VBox status code.
903 *
904 * @param pVM VM handle.
905 * @param GCPtrPage Page to invalidate.
906 *
907 * @remark ASSUMES that the guest is updating before invalidating. This order
908 * isn't required by the CPU, so this is speculative and could cause
909 * trouble.
910 *
911 * @todo Flush page or page directory only if necessary!
912 * @todo Add a #define for simply invalidating the page.
913 */
914PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCPTR GCPtrPage)
915{
916#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
917 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
918 && PGM_SHW_TYPE != PGM_TYPE_EPT
919 int rc;
920
921 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
922 /*
923 * Get the shadow PD entry and skip out if this PD isn't present.
924 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
925 */
926# if PGM_SHW_TYPE == PGM_TYPE_32BIT
927 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
928 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
929
930 /* Fetch the pgm pool shadow descriptor. */
931 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
932 Assert(pShwPde);
933
934# elif PGM_SHW_TYPE == PGM_TYPE_PAE
935 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
936 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
937
938 /* If the shadow PDPE isn't present, then skip the invalidate. */
939 if (!pPdptDst->a[iPdpt].n.u1Present)
940 {
941 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
942 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
943 return VINF_SUCCESS;
944 }
945
946 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
947 PPGMPOOLPAGE pShwPde;
948 PX86PDPAE pPDDst;
949
950 /* Fetch the pgm pool shadow descriptor. */
951 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
952 AssertRCSuccessReturn(rc, rc);
953 Assert(pShwPde);
954
955 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
956 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
957
958# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
959 /* PML4 */
960 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
961 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
962 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
963 PX86PDPAE pPDDst;
964 PX86PDPT pPdptDst;
965 PX86PML4E pPml4eDst;
966 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
967 if (rc != VINF_SUCCESS)
968 {
969 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
970 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
971 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
972 PGM_INVL_GUEST_TLBS();
973 return VINF_SUCCESS;
974 }
975 Assert(pPDDst);
976
977 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
978 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
979
980 if (!pPdpeDst->n.u1Present)
981 {
982 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
983 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
984 PGM_INVL_GUEST_TLBS();
985 return VINF_SUCCESS;
986 }
987
988# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
989
990 const SHWPDE PdeDst = *pPdeDst;
991 if (!PdeDst.n.u1Present)
992 {
993 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
994 return VINF_SUCCESS;
995 }
996
997# if defined(IN_RC)
998 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
999 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1000# endif
1001
1002 /*
1003 * Get the guest PD entry and calc big page.
1004 */
1005# if PGM_GST_TYPE == PGM_TYPE_32BIT
1006 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
1007 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1008 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1009# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1010 unsigned iPDSrc;
1011# if PGM_GST_TYPE == PGM_TYPE_PAE
1012 X86PDPE PdpeSrc;
1013 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1014# else /* AMD64 */
1015 PX86PML4E pPml4eSrc;
1016 X86PDPE PdpeSrc;
1017 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1018# endif
1019 GSTPDE PdeSrc;
1020
1021 if (pPDSrc)
1022 PdeSrc = pPDSrc->a[iPDSrc];
1023 else
1024 PdeSrc.u = 0;
1025# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1026
1027# if PGM_GST_TYPE == PGM_TYPE_AMD64
1028 const bool fIsBigPage = PdeSrc.b.u1Size;
1029# else
1030 const bool fIsBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1031# endif
1032
1033# ifdef IN_RING3
1034 /*
1035 * If a CR3 Sync is pending we may ignore the invalidate page operation
1036 * depending on the kind of sync and if it's a global page or not.
1037 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1038 */
1039# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1040 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
1041 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
1042 && fIsBigPage
1043 && PdeSrc.b.u1Global
1044 )
1045 )
1046# else
1047 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1048# endif
1049 {
1050 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1051 return VINF_SUCCESS;
1052 }
1053# endif /* IN_RING3 */
1054
1055# if PGM_GST_TYPE == PGM_TYPE_AMD64
1056 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1057
1058 /* Fetch the pgm pool shadow descriptor. */
1059 PPGMPOOLPAGE pShwPdpt = pgmPoolGetPageByHCPhys(pVM, pPml4eDst->u & X86_PML4E_PG_MASK);
1060 Assert(pShwPdpt);
1061
1062 /* Fetch the pgm pool shadow descriptor. */
1063 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1064 Assert(pShwPde);
1065
1066 Assert(pPml4eDst->n.u1Present && (pPml4eDst->u & SHW_PDPT_MASK));
1067 RTGCPHYS GCPhysPdpt = pPml4eSrc->u & X86_PML4E_PG_MASK;
1068
1069 if ( !pPml4eSrc->n.u1Present
1070 || pShwPdpt->GCPhys != GCPhysPdpt)
1071 {
1072 LogFlow(("InvalidatePage: Out-of-sync PML4E (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1073 GCPtrPage, pShwPdpt->GCPhys, GCPhysPdpt, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1074 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1075 pPml4eDst->u = 0;
1076 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1077 PGM_INVL_GUEST_TLBS();
1078 return VINF_SUCCESS;
1079 }
1080 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
1081 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
1082 {
1083 /*
1084 * Mark not present so we can resync the PML4E when it's used.
1085 */
1086 LogFlow(("InvalidatePage: Out-of-sync PML4E at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1087 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1088 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1089 pPml4eDst->u = 0;
1090 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1091 PGM_INVL_GUEST_TLBS();
1092 }
1093 else if (!pPml4eSrc->n.u1Accessed)
1094 {
1095 /*
1096 * Mark not present so we can set the accessed bit.
1097 */
1098 LogFlow(("InvalidatePage: Out-of-sync PML4E (A) at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1099 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1100 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1101 pPml4eDst->u = 0;
1102 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1103 PGM_INVL_GUEST_TLBS();
1104 }
1105
1106 /* Check if the PDPT entry has changed. */
1107 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
1108 RTGCPHYS GCPhysPd = PdpeSrc.u & GST_PDPE_PG_MASK;
1109 if ( !PdpeSrc.n.u1Present
1110 || pShwPde->GCPhys != GCPhysPd)
1111 {
1112 LogFlow(("InvalidatePage: Out-of-sync PDPE (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1113 GCPtrPage, pShwPde->GCPhys, GCPhysPd, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1114 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1115 pPdpeDst->u = 0;
1116 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1117 PGM_INVL_GUEST_TLBS();
1118 return VINF_SUCCESS;
1119 }
1120 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
1121 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
1122 {
1123 /*
1124 * Mark not present so we can resync the PDPTE when it's used.
1125 */
1126 LogFlow(("InvalidatePage: Out-of-sync PDPE at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1127 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1128 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1129 pPdpeDst->u = 0;
1130 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1131 PGM_INVL_GUEST_TLBS();
1132 }
1133 else if (!PdpeSrc.lm.u1Accessed)
1134 {
1135 /*
1136 * Mark not present so we can set the accessed bit.
1137 */
1138 LogFlow(("InvalidatePage: Out-of-sync PDPE (A) at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1139 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1140 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1141 pPdpeDst->u = 0;
1142 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1143 PGM_INVL_GUEST_TLBS();
1144 }
1145# endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1146
1147
1148 /*
1149 * Deal with the Guest PDE.
1150 */
1151 rc = VINF_SUCCESS;
1152 if (PdeSrc.n.u1Present)
1153 {
1154 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1155 {
1156 /*
1157 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1158 */
1159 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1160 Assert(PGMGetGuestMode(pVM) <= PGMMODE_PAE);
1161 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
1162 }
1163 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
1164 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1165 {
1166 /*
1167 * Mark not present so we can resync the PDE when it's used.
1168 */
1169 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1170 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1171 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1172 pPdeDst->u = 0;
1173 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1174 PGM_INVL_GUEST_TLBS();
1175 }
1176 else if (!PdeSrc.n.u1Accessed)
1177 {
1178 /*
1179 * Mark not present so we can set the accessed bit.
1180 */
1181 LogFlow(("InvalidatePage: Out-of-sync (A) at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1182 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1183 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1184 pPdeDst->u = 0;
1185 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1186 PGM_INVL_GUEST_TLBS();
1187 }
1188 else if (!fIsBigPage)
1189 {
1190 /*
1191 * 4KB - page.
1192 */
1193 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1194 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1195# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1196 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1197 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1198# endif
1199 if (pShwPage->GCPhys == GCPhys)
1200 {
1201# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1202 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1203 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1204 if (pPT->a[iPTEDst].n.u1Present)
1205 {
1206# ifdef PGMPOOL_WITH_USER_TRACKING
1207 /* This is very unlikely with caching/monitoring enabled. */
1208 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1209# endif
1210 pPT->a[iPTEDst].u = 0;
1211 }
1212# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1213 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
1214 if (RT_SUCCESS(rc))
1215 rc = VINF_SUCCESS;
1216# endif
1217 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1218 PGM_INVL_PG(GCPtrPage);
1219 }
1220 else
1221 {
1222 /*
1223 * The page table address changed.
1224 */
1225 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1226 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1227 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1228 pPdeDst->u = 0;
1229 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1230 PGM_INVL_GUEST_TLBS();
1231 }
1232 }
1233 else
1234 {
1235 /*
1236 * 2/4MB - page.
1237 */
1238 /* Before freeing the page, check if anything really changed. */
1239 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1240 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1241# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1242 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1243 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1244# endif
1245 if ( pShwPage->GCPhys == GCPhys
1246 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1247 {
1248 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1249 /** @todo PAT */
1250 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1251 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1252 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1253 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1254 {
1255 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1256 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1257# if defined(IN_RC)
1258 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1259 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1260# endif
1261 return VINF_SUCCESS;
1262 }
1263 }
1264
1265 /*
1266 * Ok, the page table is present and it's been changed in the guest.
1267 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1268 * We could do this for some flushes in GC too, but we need an algorithm for
1269 * deciding which 4MB pages containing code likely to be executed very soon.
1270 */
1271 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1272 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1273 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1274 pPdeDst->u = 0;
1275 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1276 PGM_INVL_BIG_PG(GCPtrPage);
1277 }
1278 }
1279 else
1280 {
1281 /*
1282 * Page directory is not present, mark shadow PDE not present.
1283 */
1284 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1285 {
1286 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1287 pPdeDst->u = 0;
1288 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1289 PGM_INVL_PG(GCPtrPage);
1290 }
1291 else
1292 {
1293 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1294 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1295 }
1296 }
1297# if defined(IN_RC)
1298 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1299 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1300# endif
1301 return rc;
1302
1303#else /* guest real and protected mode */
1304 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1305 return VINF_SUCCESS;
1306#endif
1307}
1308
1309
1310#ifdef PGMPOOL_WITH_USER_TRACKING
1311/**
1312 * Update the tracking of shadowed pages.
1313 *
1314 * @param pVM The VM handle.
1315 * @param pShwPage The shadow page.
1316 * @param HCPhys The physical page we is being dereferenced.
1317 */
1318DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1319{
1320# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1321 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1322 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1323
1324 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1325 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1326 * 2. write protect all shadowed pages. I.e. implement caching.
1327 */
1328 /*
1329 * Find the guest address.
1330 */
1331 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1332 pRam;
1333 pRam = pRam->CTX_SUFF(pNext))
1334 {
1335 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1336 while (iPage-- > 0)
1337 {
1338 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1339 {
1340 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1341 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1342 pShwPage->cPresent--;
1343 pPool->cPresent--;
1344 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1345 return;
1346 }
1347 }
1348 }
1349
1350 for (;;)
1351 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1352# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1353 pShwPage->cPresent--;
1354 pVM->pgm.s.CTX_SUFF(pPool)->cPresent--;
1355# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1356}
1357
1358
1359/**
1360 * Update the tracking of shadowed pages.
1361 *
1362 * @param pVM The VM handle.
1363 * @param pShwPage The shadow page.
1364 * @param u16 The top 16-bit of the pPage->HCPhys.
1365 * @param pPage Pointer to the guest page. this will be modified.
1366 * @param iPTDst The index into the shadow table.
1367 */
1368DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1369{
1370# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1371 /*
1372 * Just deal with the simple first time here.
1373 */
1374 if (!u16)
1375 {
1376 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1377 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1378 }
1379 else
1380 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1381
1382 /* write back */
1383 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1384 PGM_PAGE_SET_TRACKING(pPage, u16);
1385
1386# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1387
1388 /* update statistics. */
1389 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1390 pShwPage->cPresent++;
1391 if (pShwPage->iFirstPresent > iPTDst)
1392 pShwPage->iFirstPresent = iPTDst;
1393}
1394#endif /* PGMPOOL_WITH_USER_TRACKING */
1395
1396
1397/**
1398 * Creates a 4K shadow page for a guest page.
1399 *
1400 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1401 * physical address. The PdeSrc argument only the flags are used. No page structured
1402 * will be mapped in this function.
1403 *
1404 * @param pVM VM handle.
1405 * @param pPteDst Destination page table entry.
1406 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1407 * Can safely assume that only the flags are being used.
1408 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1409 * @param pShwPage Pointer to the shadow page.
1410 * @param iPTDst The index into the shadow table.
1411 *
1412 * @remark Not used for 2/4MB pages!
1413 */
1414DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1415{
1416 if (PteSrc.n.u1Present)
1417 {
1418 /*
1419 * Find the ram range.
1420 */
1421 PPGMPAGE pPage;
1422 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1423 if (RT_SUCCESS(rc))
1424 {
1425#ifdef VBOX_WITH_NEW_PHYS_CODE
1426# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1427 /* Try make the page writable if necessary. */
1428 if ( PteSrc.n.u1Write
1429 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1430 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1431 {
1432 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1433 AssertRC(rc);
1434 }
1435# endif
1436#endif
1437
1438 /** @todo investiage PWT, PCD and PAT. */
1439 /*
1440 * Make page table entry.
1441 */
1442 SHWPTE PteDst;
1443 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1444 {
1445 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1446 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1447 {
1448#if PGM_SHW_TYPE == PGM_TYPE_EPT
1449 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1450 PteDst.n.u1Present = 1;
1451 PteDst.n.u1Execute = 1;
1452 PteDst.n.u1IgnorePAT = 1;
1453 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1454 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1455#else
1456 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1457 | PGM_PAGE_GET_HCPHYS(pPage);
1458#endif
1459 }
1460 else
1461 {
1462 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1463 PteDst.u = 0;
1464 }
1465 /** @todo count these two kinds. */
1466 }
1467 else
1468 {
1469#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1470 /*
1471 * If the page or page directory entry is not marked accessed,
1472 * we mark the page not present.
1473 */
1474 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1475 {
1476 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1477 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1478 PteDst.u = 0;
1479 }
1480 else
1481 /*
1482 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1483 * when the page is modified.
1484 */
1485 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1486 {
1487 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1488 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1489 | PGM_PAGE_GET_HCPHYS(pPage)
1490 | PGM_PTFLAGS_TRACK_DIRTY;
1491 }
1492 else
1493#endif
1494 {
1495 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1496#if PGM_SHW_TYPE == PGM_TYPE_EPT
1497 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1498 PteDst.n.u1Present = 1;
1499 PteDst.n.u1Write = 1;
1500 PteDst.n.u1Execute = 1;
1501 PteDst.n.u1IgnorePAT = 1;
1502 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1503 /* PteDst.n.u1Size = 0 */
1504#else
1505 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1506 | PGM_PAGE_GET_HCPHYS(pPage);
1507#endif
1508 }
1509 }
1510
1511#ifdef VBOX_WITH_NEW_PHYS_CODE
1512 /*
1513 * Make sure only allocated pages are mapped writable.
1514 */
1515 if ( PteDst.n.u1Write
1516 && PteDst.n.u1Present
1517 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1518 {
1519 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1520 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1521 }
1522#endif
1523
1524#ifdef PGMPOOL_WITH_USER_TRACKING
1525 /*
1526 * Keep user track up to date.
1527 */
1528 if (PteDst.n.u1Present)
1529 {
1530 if (!pPteDst->n.u1Present)
1531 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1532 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1533 {
1534 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1535 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1536 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1537 }
1538 }
1539 else if (pPteDst->n.u1Present)
1540 {
1541 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1542 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1543 }
1544#endif /* PGMPOOL_WITH_USER_TRACKING */
1545
1546 /*
1547 * Update statistics and commit the entry.
1548 */
1549#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1550 if (!PteSrc.n.u1Global)
1551 pShwPage->fSeenNonGlobal = true;
1552#endif
1553 *pPteDst = PteDst;
1554 }
1555 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1556 /** @todo count these. */
1557 }
1558 else
1559 {
1560 /*
1561 * Page not-present.
1562 */
1563 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1564#ifdef PGMPOOL_WITH_USER_TRACKING
1565 /* Keep user track up to date. */
1566 if (pPteDst->n.u1Present)
1567 {
1568 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1569 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1570 }
1571#endif /* PGMPOOL_WITH_USER_TRACKING */
1572 pPteDst->u = 0;
1573 /** @todo count these. */
1574 }
1575}
1576
1577
1578/**
1579 * Syncs a guest OS page.
1580 *
1581 * There are no conflicts at this point, neither is there any need for
1582 * page table allocations.
1583 *
1584 * @returns VBox status code.
1585 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1586 * @param pVM VM handle.
1587 * @param PdeSrc Page directory entry of the guest.
1588 * @param GCPtrPage Guest context page address.
1589 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1590 * @param uErr Fault error (X86_TRAP_PF_*).
1591 */
1592PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1593{
1594 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1595
1596#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1597 || PGM_GST_TYPE == PGM_TYPE_PAE \
1598 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1599 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1600 && PGM_SHW_TYPE != PGM_TYPE_EPT
1601
1602# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1603 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1604# endif
1605
1606 /*
1607 * Assert preconditions.
1608 */
1609 Assert(PdeSrc.n.u1Present);
1610 Assert(cPages);
1611 STAM_COUNTER_INC(&pVM->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1612
1613 /*
1614 * Get the shadow PDE, find the shadow page table in the pool.
1615 */
1616# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1617 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1618 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
1619
1620 /* Fetch the pgm pool shadow descriptor. */
1621 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
1622 Assert(pShwPde);
1623
1624# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1625 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1626 PPGMPOOLPAGE pShwPde;
1627 PX86PDPAE pPDDst;
1628
1629 /* Fetch the pgm pool shadow descriptor. */
1630 int rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
1631 AssertRCSuccessReturn(rc, rc);
1632 Assert(pShwPde);
1633
1634 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1635 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1636
1637# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1638 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1639 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1640 PX86PDPAE pPDDst;
1641 PX86PDPT pPdptDst;
1642
1643 int rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1644 AssertRCSuccessReturn(rc, rc);
1645 Assert(pPDDst && pPdptDst);
1646 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1647# endif
1648
1649 SHWPDE PdeDst = *pPdeDst;
1650 AssertMsg(PdeDst.n.u1Present, ("%p=%llx\n", pPdeDst, (uint64_t)PdeDst.u));
1651 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1652
1653# if PGM_GST_TYPE == PGM_TYPE_AMD64
1654 /* Fetch the pgm pool shadow descriptor. */
1655 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1656 Assert(pShwPde);
1657# endif
1658
1659# if defined(IN_RC)
1660 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1661 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1662# endif
1663
1664 /*
1665 * Check that the page is present and that the shadow PDE isn't out of sync.
1666 */
1667# if PGM_GST_TYPE == PGM_TYPE_AMD64
1668 const bool fBigPage = PdeSrc.b.u1Size;
1669# else
1670 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1671# endif
1672 RTGCPHYS GCPhys;
1673 if (!fBigPage)
1674 {
1675 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1676# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1677 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1678 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1679# endif
1680 }
1681 else
1682 {
1683 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1684# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1685 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1686 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1687# endif
1688 }
1689 if ( pShwPage->GCPhys == GCPhys
1690 && PdeSrc.n.u1Present
1691 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1692 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1693# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1694 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1695# endif
1696 )
1697 {
1698 /*
1699 * Check that the PDE is marked accessed already.
1700 * Since we set the accessed bit *before* getting here on a #PF, this
1701 * check is only meant for dealing with non-#PF'ing paths.
1702 */
1703 if (PdeSrc.n.u1Accessed)
1704 {
1705 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1706 if (!fBigPage)
1707 {
1708 /*
1709 * 4KB Page - Map the guest page table.
1710 */
1711 PGSTPT pPTSrc;
1712 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1713 if (RT_SUCCESS(rc))
1714 {
1715# ifdef PGM_SYNC_N_PAGES
1716 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1717 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1718 {
1719 /*
1720 * This code path is currently only taken when the caller is PGMTrap0eHandler
1721 * for non-present pages!
1722 *
1723 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1724 * deal with locality.
1725 */
1726 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1727# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1728 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1729 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1730# else
1731 const unsigned offPTSrc = 0;
1732# endif
1733 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1734 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1735 iPTDst = 0;
1736 else
1737 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1738 for (; iPTDst < iPTDstEnd; iPTDst++)
1739 {
1740 if (!pPTDst->a[iPTDst].n.u1Present)
1741 {
1742 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1743 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1744 NOREF(GCPtrCurPage);
1745#ifndef IN_RING0
1746 /*
1747 * Assuming kernel code will be marked as supervisor - and not as user level
1748 * and executed using a conforming code selector - And marked as readonly.
1749 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1750 */
1751 PPGMPAGE pPage;
1752 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1753 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1754 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)GCPtrCurPage)
1755 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1756 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1757 )
1758#endif /* else: CSAM not active */
1759 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1760 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1761 GCPtrCurPage, PteSrc.n.u1Present,
1762 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1763 PteSrc.n.u1User & PdeSrc.n.u1User,
1764 (uint64_t)PteSrc.u,
1765 (uint64_t)pPTDst->a[iPTDst].u,
1766 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1767 }
1768 }
1769 }
1770 else
1771# endif /* PGM_SYNC_N_PAGES */
1772 {
1773 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1774 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1775 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1776 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1777 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1778 GCPtrPage, PteSrc.n.u1Present,
1779 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1780 PteSrc.n.u1User & PdeSrc.n.u1User,
1781 (uint64_t)PteSrc.u,
1782 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1783 }
1784 }
1785 else /* MMIO or invalid page: emulated in #PF handler. */
1786 {
1787 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1788 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1789 }
1790 }
1791 else
1792 {
1793 /*
1794 * 4/2MB page - lazy syncing shadow 4K pages.
1795 * (There are many causes of getting here, it's no longer only CSAM.)
1796 */
1797 /* Calculate the GC physical address of this 4KB shadow page. */
1798 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1799 /* Find ram range. */
1800 PPGMPAGE pPage;
1801 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1802 if (RT_SUCCESS(rc))
1803 {
1804# ifdef VBOX_WITH_NEW_PHYS_CODE
1805# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1806 /* Try make the page writable if necessary. */
1807 if ( PdeSrc.n.u1Write
1808 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1809 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1810 {
1811 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
1812 AssertRC(rc);
1813 }
1814# endif
1815# endif
1816
1817 /*
1818 * Make shadow PTE entry.
1819 */
1820 SHWPTE PteDst;
1821 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1822 | PGM_PAGE_GET_HCPHYS(pPage);
1823 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1824 {
1825 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1826 PteDst.n.u1Write = 0;
1827 else
1828 PteDst.u = 0;
1829 }
1830 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1831# ifdef PGMPOOL_WITH_USER_TRACKING
1832 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1833 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1834# endif
1835# ifdef VBOX_WITH_NEW_PHYS_CODE
1836 /* Make sure only allocated pages are mapped writable. */
1837 if ( PteDst.n.u1Write
1838 && PteDst.n.u1Present
1839 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1840 {
1841 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1842 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1843 }
1844# endif
1845
1846 pPTDst->a[iPTDst] = PteDst;
1847
1848
1849 /*
1850 * If the page is not flagged as dirty and is writable, then make it read-only
1851 * at PD level, so we can set the dirty bit when the page is modified.
1852 *
1853 * ASSUMES that page access handlers are implemented on page table entry level.
1854 * Thus we will first catch the dirty access and set PDE.D and restart. If
1855 * there is an access handler, we'll trap again and let it work on the problem.
1856 */
1857 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1858 * As for invlpg, it simply frees the whole shadow PT.
1859 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1860 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1861 {
1862 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1863 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1864 PdeDst.n.u1Write = 0;
1865 }
1866 else
1867 {
1868 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1869 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1870 }
1871 *pPdeDst = PdeDst;
1872 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1873 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1874 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1875 }
1876 else
1877 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1878 }
1879# if defined(IN_RC)
1880 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1881 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1882# endif
1883 return VINF_SUCCESS;
1884 }
1885 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1886 }
1887 else
1888 {
1889 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1890 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1891 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1892 }
1893
1894 /*
1895 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1896 * Yea, I'm lazy.
1897 */
1898 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1899 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1900
1901 pPdeDst->u = 0;
1902
1903# if defined(IN_RC)
1904 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1905 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1906# endif
1907 PGM_INVL_GUEST_TLBS();
1908 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1909
1910#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1911 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1912 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1913 && !defined(IN_RC)
1914
1915# ifdef PGM_SYNC_N_PAGES
1916 /*
1917 * Get the shadow PDE, find the shadow page table in the pool.
1918 */
1919# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1920 X86PDE PdeDst = pgmShwGet32BitPDE(&pVM->pgm.s, GCPtrPage);
1921
1922# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1923 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVM->pgm.s, GCPtrPage);
1924
1925# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1926 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1927 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1928 PX86PDPAE pPDDst;
1929 X86PDEPAE PdeDst;
1930 PX86PDPT pPdptDst;
1931
1932 int rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1933 AssertRCSuccessReturn(rc, rc);
1934 Assert(pPDDst && pPdptDst);
1935 PdeDst = pPDDst->a[iPDDst];
1936# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1937 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1938 PEPTPD pPDDst;
1939 EPTPDE PdeDst;
1940
1941 int rc = pgmShwGetEPTPDPtr(pVM, GCPtrPage, NULL, &pPDDst);
1942 if (rc != VINF_SUCCESS)
1943 {
1944 AssertRC(rc);
1945 return rc;
1946 }
1947 Assert(pPDDst);
1948 PdeDst = pPDDst->a[iPDDst];
1949# endif
1950 AssertMsg(PdeDst.n.u1Present, ("%#llx\n", (uint64_t)PdeDst.u));
1951 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1952 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1953
1954 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1955 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1956 {
1957 /*
1958 * This code path is currently only taken when the caller is PGMTrap0eHandler
1959 * for non-present pages!
1960 *
1961 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1962 * deal with locality.
1963 */
1964 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1965 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1966 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1967 iPTDst = 0;
1968 else
1969 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1970 for (; iPTDst < iPTDstEnd; iPTDst++)
1971 {
1972 if (!pPTDst->a[iPTDst].n.u1Present)
1973 {
1974 GSTPTE PteSrc;
1975
1976 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
1977
1978 /* Fake the page table entry */
1979 PteSrc.u = GCPtrCurPage;
1980 PteSrc.n.u1Present = 1;
1981 PteSrc.n.u1Dirty = 1;
1982 PteSrc.n.u1Accessed = 1;
1983 PteSrc.n.u1Write = 1;
1984 PteSrc.n.u1User = 1;
1985
1986 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1987
1988 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1989 GCPtrCurPage, PteSrc.n.u1Present,
1990 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1991 PteSrc.n.u1User & PdeSrc.n.u1User,
1992 (uint64_t)PteSrc.u,
1993 (uint64_t)pPTDst->a[iPTDst].u,
1994 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1995 }
1996 else
1997 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
1998 }
1999 }
2000 else
2001# endif /* PGM_SYNC_N_PAGES */
2002 {
2003 GSTPTE PteSrc;
2004 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2005 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2006
2007 /* Fake the page table entry */
2008 PteSrc.u = GCPtrCurPage;
2009 PteSrc.n.u1Present = 1;
2010 PteSrc.n.u1Dirty = 1;
2011 PteSrc.n.u1Accessed = 1;
2012 PteSrc.n.u1Write = 1;
2013 PteSrc.n.u1User = 1;
2014 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2015
2016 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2017 GCPtrPage, PteSrc.n.u1Present,
2018 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2019 PteSrc.n.u1User & PdeSrc.n.u1User,
2020 (uint64_t)PteSrc.u,
2021 (uint64_t)pPTDst->a[iPTDst].u,
2022 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2023 }
2024 return VINF_SUCCESS;
2025
2026#else
2027 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2028 return VERR_INTERNAL_ERROR;
2029#endif
2030}
2031
2032
2033#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2034/**
2035 * Investigate page fault and handle write protection page faults caused by
2036 * dirty bit tracking.
2037 *
2038 * @returns VBox status code.
2039 * @param pVM VM handle.
2040 * @param uErr Page fault error code.
2041 * @param pPdeDst Shadow page directory entry.
2042 * @param pPdeSrc Guest page directory entry.
2043 * @param GCPtrPage Guest context page address.
2044 */
2045PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2046{
2047 bool fWriteProtect = !!(CPUMGetGuestCR0(pVM) & X86_CR0_WP);
2048 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2049 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2050# if PGM_GST_TYPE == PGM_TYPE_AMD64
2051 bool fBigPagesSupported = true;
2052# else
2053 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2054# endif
2055# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2056 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
2057# endif
2058 unsigned uPageFaultLevel;
2059 int rc;
2060
2061 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2062 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2063
2064# if PGM_GST_TYPE == PGM_TYPE_PAE \
2065 || PGM_GST_TYPE == PGM_TYPE_AMD64
2066
2067# if PGM_GST_TYPE == PGM_TYPE_AMD64
2068 PX86PML4E pPml4eSrc;
2069 PX86PDPE pPdpeSrc;
2070
2071 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc);
2072 Assert(pPml4eSrc);
2073
2074 /*
2075 * Real page fault? (PML4E level)
2076 */
2077 if ( (uErr & X86_TRAP_PF_RSVD)
2078 || !pPml4eSrc->n.u1Present
2079 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
2080 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2081 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2082 )
2083 {
2084 uPageFaultLevel = 0;
2085 goto l_UpperLevelPageFault;
2086 }
2087 Assert(pPdpeSrc);
2088
2089# else /* PAE */
2090 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVM->pgm.s, GCPtrPage);
2091# endif /* PAE */
2092
2093 /*
2094 * Real page fault? (PDPE level)
2095 */
2096 if ( (uErr & X86_TRAP_PF_RSVD)
2097 || !pPdpeSrc->n.u1Present
2098# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2099 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
2100 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
2101 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2102# endif
2103 )
2104 {
2105 uPageFaultLevel = 1;
2106 goto l_UpperLevelPageFault;
2107 }
2108# endif
2109
2110 /*
2111 * Real page fault? (PDE level)
2112 */
2113 if ( (uErr & X86_TRAP_PF_RSVD)
2114 || !pPdeSrc->n.u1Present
2115# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2116 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
2117# endif
2118 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2119 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2120 {
2121 uPageFaultLevel = 2;
2122 goto l_UpperLevelPageFault;
2123 }
2124
2125 /*
2126 * First check the easy case where the page directory has been marked read-only to track
2127 * the dirty bit of an emulated BIG page
2128 */
2129 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2130 {
2131 /* Mark guest page directory as accessed */
2132# if PGM_GST_TYPE == PGM_TYPE_AMD64
2133 pPml4eSrc->n.u1Accessed = 1;
2134 pPdpeSrc->lm.u1Accessed = 1;
2135# endif
2136 pPdeSrc->b.u1Accessed = 1;
2137
2138 /*
2139 * Only write protection page faults are relevant here.
2140 */
2141 if (fWriteFault)
2142 {
2143 /* Mark guest page directory as dirty (BIG page only). */
2144 pPdeSrc->b.u1Dirty = 1;
2145
2146 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2147 {
2148 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2149
2150 Assert(pPdeSrc->b.u1Write);
2151
2152 pPdeDst->n.u1Write = 1;
2153 pPdeDst->n.u1Accessed = 1;
2154 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2155 PGM_INVL_BIG_PG(GCPtrPage);
2156 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2157 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2158 }
2159 }
2160 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2161 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2162 }
2163 /* else: 4KB page table */
2164
2165 /*
2166 * Map the guest page table.
2167 */
2168 PGSTPT pPTSrc;
2169 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2170 if (RT_SUCCESS(rc))
2171 {
2172 /*
2173 * Real page fault?
2174 */
2175 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2176 const GSTPTE PteSrc = *pPteSrc;
2177 if ( !PteSrc.n.u1Present
2178# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2179 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
2180# endif
2181 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
2182 || (fUserLevelFault && !PteSrc.n.u1User)
2183 )
2184 {
2185 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2186 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2187 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2188
2189 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2190 * See the 2nd case above as well.
2191 */
2192 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2193 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2194
2195 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2196 return VINF_EM_RAW_GUEST_TRAP;
2197 }
2198 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2199
2200 /*
2201 * Set the accessed bits in the page directory and the page table.
2202 */
2203# if PGM_GST_TYPE == PGM_TYPE_AMD64
2204 pPml4eSrc->n.u1Accessed = 1;
2205 pPdpeSrc->lm.u1Accessed = 1;
2206# endif
2207 pPdeSrc->n.u1Accessed = 1;
2208 pPteSrc->n.u1Accessed = 1;
2209
2210 /*
2211 * Only write protection page faults are relevant here.
2212 */
2213 if (fWriteFault)
2214 {
2215 /* Write access, so mark guest entry as dirty. */
2216# ifdef VBOX_WITH_STATISTICS
2217 if (!pPteSrc->n.u1Dirty)
2218 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2219 else
2220 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2221# endif
2222
2223 pPteSrc->n.u1Dirty = 1;
2224
2225 if (pPdeDst->n.u1Present)
2226 {
2227#ifndef IN_RING0
2228 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2229 * Our individual shadow handlers will provide more information and force a fatal exit.
2230 */
2231 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2232 {
2233 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2234 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2235 return VINF_SUCCESS;
2236 }
2237#endif
2238 /*
2239 * Map shadow page table.
2240 */
2241 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2242 if (pShwPage)
2243 {
2244 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2245 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2246 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
2247 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
2248 {
2249 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2250# ifdef VBOX_STRICT
2251 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2252 if (pPage)
2253 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
2254 ("Unexpected dirty bit tracking on monitored page %RGv (phys %RGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
2255# endif
2256 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2257
2258 Assert(pPteSrc->n.u1Write);
2259
2260 pPteDst->n.u1Write = 1;
2261 pPteDst->n.u1Dirty = 1;
2262 pPteDst->n.u1Accessed = 1;
2263 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2264 PGM_INVL_PG(GCPtrPage);
2265
2266 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2267 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2268 }
2269 }
2270 else
2271 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2272 }
2273 }
2274/** @todo Optimize accessed bit emulation? */
2275# ifdef VBOX_STRICT
2276 /*
2277 * Sanity check.
2278 */
2279 else if ( !pPteSrc->n.u1Dirty
2280 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2281 && pPdeDst->n.u1Present)
2282 {
2283 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2284 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2285 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2286 if ( pPteDst->n.u1Present
2287 && pPteDst->n.u1Write)
2288 LogFlow(("Writable present page %RGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2289 }
2290# endif /* VBOX_STRICT */
2291 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2292 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2293 }
2294 AssertRC(rc);
2295 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2296 return rc;
2297
2298
2299l_UpperLevelPageFault:
2300 /*
2301 * Pagefault detected while checking the PML4E, PDPE or PDE.
2302 * Single exit handler to get rid of duplicate code paths.
2303 */
2304 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2305 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2306 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2307
2308 if (
2309# if PGM_GST_TYPE == PGM_TYPE_AMD64
2310 pPml4eSrc->n.u1Present &&
2311# endif
2312# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2313 pPdpeSrc->n.u1Present &&
2314# endif
2315 pPdeSrc->n.u1Present)
2316 {
2317 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2318 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2319 {
2320 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2321 }
2322 else
2323 {
2324 /*
2325 * Map the guest page table.
2326 */
2327 PGSTPT pPTSrc;
2328 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2329 if (RT_SUCCESS(rc))
2330 {
2331 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2332 const GSTPTE PteSrc = *pPteSrc;
2333 if (pPteSrc->n.u1Present)
2334 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2335 }
2336 AssertRC(rc);
2337 }
2338 }
2339 return VINF_EM_RAW_GUEST_TRAP;
2340}
2341#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2342
2343
2344/**
2345 * Sync a shadow page table.
2346 *
2347 * The shadow page table is not present. This includes the case where
2348 * there is a conflict with a mapping.
2349 *
2350 * @returns VBox status code.
2351 * @param pVM VM handle.
2352 * @param iPD Page directory index.
2353 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2354 * Assume this is a temporary mapping.
2355 * @param GCPtrPage GC Pointer of the page that caused the fault
2356 */
2357PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2358{
2359 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2360 STAM_COUNTER_INC(&pVM->pgm.s.StatSyncPtPD[iPDSrc]);
2361 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2362
2363#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2364 || PGM_GST_TYPE == PGM_TYPE_PAE \
2365 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2366 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2367 && PGM_SHW_TYPE != PGM_TYPE_EPT
2368
2369 int rc = VINF_SUCCESS;
2370
2371 /*
2372 * Validate input a little bit.
2373 */
2374 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2375# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2376 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2377 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
2378
2379 /* Fetch the pgm pool shadow descriptor. */
2380 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
2381 Assert(pShwPde);
2382
2383# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2384 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2385 PPGMPOOLPAGE pShwPde;
2386 PX86PDPAE pPDDst;
2387 PSHWPDE pPdeDst;
2388
2389 /* Fetch the pgm pool shadow descriptor. */
2390 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
2391 AssertRCSuccessReturn(rc, rc);
2392 Assert(pShwPde);
2393
2394 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2395 pPdeDst = &pPDDst->a[iPDDst];
2396
2397# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2398 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2399 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2400 PX86PDPAE pPDDst;
2401 PX86PDPT pPdptDst;
2402 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2403 AssertRCSuccessReturn(rc, rc);
2404 Assert(pPDDst);
2405 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2406# endif
2407 SHWPDE PdeDst = *pPdeDst;
2408
2409# if PGM_GST_TYPE == PGM_TYPE_AMD64
2410 /* Fetch the pgm pool shadow descriptor. */
2411 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2412 Assert(pShwPde);
2413# endif
2414
2415# ifndef PGM_WITHOUT_MAPPINGS
2416 /*
2417 * Check for conflicts.
2418 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2419 * HC: Simply resolve the conflict.
2420 */
2421 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2422 {
2423 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2424# ifndef IN_RING3
2425 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2426 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2427 return VERR_ADDRESS_CONFLICT;
2428# else
2429 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2430 Assert(pMapping);
2431# if PGM_GST_TYPE == PGM_TYPE_32BIT
2432 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2433# elif PGM_GST_TYPE == PGM_TYPE_PAE
2434 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2435# else
2436 AssertFailed(); /* can't happen for amd64 */
2437# endif
2438 if (RT_FAILURE(rc))
2439 {
2440 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2441 return rc;
2442 }
2443 PdeDst = *pPdeDst;
2444# endif
2445 }
2446# else /* PGM_WITHOUT_MAPPINGS */
2447 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2448# endif /* PGM_WITHOUT_MAPPINGS */
2449 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2450
2451# if defined(IN_RC)
2452 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2453 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2454# endif
2455
2456 /*
2457 * Sync page directory entry.
2458 */
2459 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2460 if (PdeSrc.n.u1Present)
2461 {
2462 /*
2463 * Allocate & map the page table.
2464 */
2465 PSHWPT pPTDst;
2466# if PGM_GST_TYPE == PGM_TYPE_AMD64
2467 const bool fPageTable = !PdeSrc.b.u1Size;
2468# else
2469 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2470# endif
2471 PPGMPOOLPAGE pShwPage;
2472 RTGCPHYS GCPhys;
2473 if (fPageTable)
2474 {
2475 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2476# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2477 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2478 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2479# endif
2480 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2481 }
2482 else
2483 {
2484 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2485# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2486 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2487 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2488# endif
2489 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, pShwPde->idx, iPDDst, &pShwPage);
2490 }
2491 if (rc == VINF_SUCCESS)
2492 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2493 else if (rc == VINF_PGM_CACHED_PAGE)
2494 {
2495 /*
2496 * The PT was cached, just hook it up.
2497 */
2498 if (fPageTable)
2499 PdeDst.u = pShwPage->Core.Key
2500 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2501 else
2502 {
2503 PdeDst.u = pShwPage->Core.Key
2504 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2505 /* (see explanation and assumptions further down.) */
2506 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2507 {
2508 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2509 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2510 PdeDst.b.u1Write = 0;
2511 }
2512 }
2513 *pPdeDst = PdeDst;
2514# if defined(IN_RC)
2515 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2516# endif
2517 return VINF_SUCCESS;
2518 }
2519 else if (rc == VERR_PGM_POOL_FLUSHED)
2520 {
2521 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
2522# if defined(IN_RC)
2523 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2524# endif
2525 return VINF_PGM_SYNC_CR3;
2526 }
2527 else
2528 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2529 PdeDst.u &= X86_PDE_AVL_MASK;
2530 PdeDst.u |= pShwPage->Core.Key;
2531
2532 /*
2533 * Page directory has been accessed (this is a fault situation, remember).
2534 */
2535 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2536 if (fPageTable)
2537 {
2538 /*
2539 * Page table - 4KB.
2540 *
2541 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2542 */
2543 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2544 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2545 PGSTPT pPTSrc;
2546 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2547 if (RT_SUCCESS(rc))
2548 {
2549 /*
2550 * Start by syncing the page directory entry so CSAM's TLB trick works.
2551 */
2552 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2553 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2554 *pPdeDst = PdeDst;
2555# if defined(IN_RC)
2556 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2557# endif
2558
2559 /*
2560 * Directory/page user or supervisor privilege: (same goes for read/write)
2561 *
2562 * Directory Page Combined
2563 * U/S U/S U/S
2564 * 0 0 0
2565 * 0 1 0
2566 * 1 0 0
2567 * 1 1 1
2568 *
2569 * Simple AND operation. Table listed for completeness.
2570 *
2571 */
2572 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2573# ifdef PGM_SYNC_N_PAGES
2574 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2575 unsigned iPTDst = iPTBase;
2576 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2577 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2578 iPTDst = 0;
2579 else
2580 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2581# else /* !PGM_SYNC_N_PAGES */
2582 unsigned iPTDst = 0;
2583 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2584# endif /* !PGM_SYNC_N_PAGES */
2585# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2586 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2587 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2588# else
2589 const unsigned offPTSrc = 0;
2590# endif
2591 for (; iPTDst < iPTDstEnd; iPTDst++)
2592 {
2593 const unsigned iPTSrc = iPTDst + offPTSrc;
2594 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2595
2596 if (PteSrc.n.u1Present) /* we've already cleared it above */
2597 {
2598# ifndef IN_RING0
2599 /*
2600 * Assuming kernel code will be marked as supervisor - and not as user level
2601 * and executed using a conforming code selector - And marked as readonly.
2602 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2603 */
2604 PPGMPAGE pPage;
2605 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2606 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2607 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2608 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2609 )
2610# endif
2611 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2612 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2613 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2614 PteSrc.n.u1Present,
2615 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2616 PteSrc.n.u1User & PdeSrc.n.u1User,
2617 (uint64_t)PteSrc.u,
2618 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2619 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2620 }
2621 } /* for PTEs */
2622 }
2623 }
2624 else
2625 {
2626 /*
2627 * Big page - 2/4MB.
2628 *
2629 * We'll walk the ram range list in parallel and optimize lookups.
2630 * We will only sync on shadow page table at a time.
2631 */
2632 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2633
2634 /**
2635 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2636 */
2637
2638 /*
2639 * Start by syncing the page directory entry.
2640 */
2641 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2642 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2643
2644 /*
2645 * If the page is not flagged as dirty and is writable, then make it read-only
2646 * at PD level, so we can set the dirty bit when the page is modified.
2647 *
2648 * ASSUMES that page access handlers are implemented on page table entry level.
2649 * Thus we will first catch the dirty access and set PDE.D and restart. If
2650 * there is an access handler, we'll trap again and let it work on the problem.
2651 */
2652 /** @todo move the above stuff to a section in the PGM documentation. */
2653 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2654 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2655 {
2656 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2657 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2658 PdeDst.b.u1Write = 0;
2659 }
2660 *pPdeDst = PdeDst;
2661# if defined(IN_RC)
2662 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2663# endif
2664
2665 /*
2666 * Fill the shadow page table.
2667 */
2668 /* Get address and flags from the source PDE. */
2669 SHWPTE PteDstBase;
2670 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2671
2672 /* Loop thru the entries in the shadow PT. */
2673 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2674 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2675 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2676 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2677 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2678 unsigned iPTDst = 0;
2679 while (iPTDst < RT_ELEMENTS(pPTDst->a))
2680 {
2681 /* Advance ram range list. */
2682 while (pRam && GCPhys > pRam->GCPhysLast)
2683 pRam = pRam->CTX_SUFF(pNext);
2684 if (pRam && GCPhys >= pRam->GCPhys)
2685 {
2686 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2687 do
2688 {
2689 /* Make shadow PTE. */
2690 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2691 SHWPTE PteDst;
2692
2693# ifdef VBOX_WITH_NEW_PHYS_CODE
2694# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2695 /* Try make the page writable if necessary. */
2696 if ( PteDstBase.n.u1Write
2697 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2698 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2699 {
2700 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
2701 AssertRCReturn(rc, rc);
2702 }
2703# endif
2704# else /* !VBOX_WITH_NEW_PHYS_CODE */
2705 /* Make sure the RAM has already been allocated. */
2706 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC) /** @todo PAGE FLAGS */
2707 {
2708 if (RT_UNLIKELY(!PGM_PAGE_GET_HCPHYS(pPage)))
2709 {
2710# ifdef IN_RING3
2711 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2712# else
2713 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2714# endif
2715 if (rc != VINF_SUCCESS)
2716 return rc;
2717 }
2718 }
2719# endif /* !VBOX_WITH_NEW_PHYS_CODE */
2720
2721 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2722 {
2723 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2724 {
2725 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2726 PteDst.n.u1Write = 0;
2727 }
2728 else
2729 PteDst.u = 0;
2730 }
2731# ifndef IN_RING0
2732 /*
2733 * Assuming kernel code will be marked as supervisor and not as user level and executed
2734 * using a conforming code selector. Don't check for readonly, as that implies the whole
2735 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2736 */
2737 else if ( !PdeSrc.n.u1User
2738 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2739 PteDst.u = 0;
2740# endif
2741 else
2742 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2743
2744# ifdef VBOX_WITH_NEW_PHYS_CODE
2745 /* Only map writable pages writable. */
2746 if ( PteDst.n.u1Write
2747 && PteDst.n.u1Present
2748 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2749 {
2750 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2751 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2752 }
2753# endif
2754
2755# ifdef PGMPOOL_WITH_USER_TRACKING
2756 if (PteDst.n.u1Present)
2757 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2758# endif
2759 /* commit it */
2760 pPTDst->a[iPTDst] = PteDst;
2761 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2762 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2763 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2764
2765 /* advance */
2766 GCPhys += PAGE_SIZE;
2767 iHCPage++;
2768 iPTDst++;
2769 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2770 && GCPhys <= pRam->GCPhysLast);
2771 }
2772 else if (pRam)
2773 {
2774 Log(("Invalid pages at %RGp\n", GCPhys));
2775 do
2776 {
2777 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2778 GCPhys += PAGE_SIZE;
2779 iPTDst++;
2780 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2781 && GCPhys < pRam->GCPhys);
2782 }
2783 else
2784 {
2785 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2786 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2787 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2788 }
2789 } /* while more PTEs */
2790 } /* 4KB / 4MB */
2791 }
2792 else
2793 AssertRelease(!PdeDst.n.u1Present);
2794
2795 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2796 if (RT_FAILURE(rc))
2797 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2798 return rc;
2799
2800#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2801 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2802 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2803 && !defined(IN_RC)
2804
2805 /*
2806 * Validate input a little bit.
2807 */
2808 int rc = VINF_SUCCESS;
2809# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2810 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2811 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
2812
2813 /* Fetch the pgm pool shadow descriptor. */
2814 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
2815 Assert(pShwPde);
2816
2817# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2818 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2819 PPGMPOOLPAGE pShwPde;
2820 PX86PDPAE pPDDst;
2821 PSHWPDE pPdeDst;
2822
2823 /* Fetch the pgm pool shadow descriptor. */
2824 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
2825 AssertRCSuccessReturn(rc, rc);
2826 Assert(pShwPde);
2827
2828 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2829 pPdeDst = &pPDDst->a[iPDDst];
2830
2831# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2832 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2833 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2834 PX86PDPAE pPDDst;
2835 PX86PDPT pPdptDst;
2836 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2837 AssertRCSuccessReturn(rc, rc);
2838 Assert(pPDDst);
2839 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2840
2841 /* Fetch the pgm pool shadow descriptor. */
2842 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2843 Assert(pShwPde);
2844
2845# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2846 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2847 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2848 PEPTPD pPDDst;
2849 PEPTPDPT pPdptDst;
2850
2851 rc = pgmShwGetEPTPDPtr(pVM, GCPtrPage, &pPdptDst, &pPDDst);
2852 if (rc != VINF_SUCCESS)
2853 {
2854 AssertRC(rc);
2855 return rc;
2856 }
2857 Assert(pPDDst);
2858 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2859
2860 /* Fetch the pgm pool shadow descriptor. */
2861 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2862 Assert(pShwPde);
2863# endif
2864 SHWPDE PdeDst = *pPdeDst;
2865
2866 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2867 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2868
2869 GSTPDE PdeSrc;
2870 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2871 PdeSrc.n.u1Present = 1;
2872 PdeSrc.n.u1Write = 1;
2873 PdeSrc.n.u1Accessed = 1;
2874 PdeSrc.n.u1User = 1;
2875
2876 /*
2877 * Allocate & map the page table.
2878 */
2879 PSHWPT pPTDst;
2880 PPGMPOOLPAGE pShwPage;
2881 RTGCPHYS GCPhys;
2882
2883 /* Virtual address = physical address */
2884 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
2885 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2886
2887 if ( rc == VINF_SUCCESS
2888 || rc == VINF_PGM_CACHED_PAGE)
2889 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2890 else
2891 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2892
2893 PdeDst.u &= X86_PDE_AVL_MASK;
2894 PdeDst.u |= pShwPage->Core.Key;
2895 PdeDst.n.u1Present = 1;
2896 PdeDst.n.u1Write = 1;
2897# if PGM_SHW_TYPE == PGM_TYPE_EPT
2898 PdeDst.n.u1Execute = 1;
2899# else
2900 PdeDst.n.u1User = 1;
2901 PdeDst.n.u1Accessed = 1;
2902# endif
2903 *pPdeDst = PdeDst;
2904
2905 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2906 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2907 return rc;
2908
2909#else
2910 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
2911 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2912 return VERR_INTERNAL_ERROR;
2913#endif
2914}
2915
2916
2917
2918/**
2919 * Prefetch a page/set of pages.
2920 *
2921 * Typically used to sync commonly used pages before entering raw mode
2922 * after a CR3 reload.
2923 *
2924 * @returns VBox status code.
2925 * @param pVM VM handle.
2926 * @param GCPtrPage Page to invalidate.
2927 */
2928PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCPTR GCPtrPage)
2929{
2930#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2931 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
2932 /*
2933 * Check that all Guest levels thru the PDE are present, getting the
2934 * PD and PDE in the processes.
2935 */
2936 int rc = VINF_SUCCESS;
2937# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2938# if PGM_GST_TYPE == PGM_TYPE_32BIT
2939 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
2940 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
2941# elif PGM_GST_TYPE == PGM_TYPE_PAE
2942 unsigned iPDSrc;
2943 X86PDPE PdpeSrc;
2944 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
2945 if (!pPDSrc)
2946 return VINF_SUCCESS; /* not present */
2947# elif PGM_GST_TYPE == PGM_TYPE_AMD64
2948 unsigned iPDSrc;
2949 PX86PML4E pPml4eSrc;
2950 X86PDPE PdpeSrc;
2951 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
2952 if (!pPDSrc)
2953 return VINF_SUCCESS; /* not present */
2954# endif
2955 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2956# else
2957 PGSTPD pPDSrc = NULL;
2958 const unsigned iPDSrc = 0;
2959 GSTPDE PdeSrc;
2960
2961 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2962 PdeSrc.n.u1Present = 1;
2963 PdeSrc.n.u1Write = 1;
2964 PdeSrc.n.u1Accessed = 1;
2965 PdeSrc.n.u1User = 1;
2966# endif
2967
2968 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
2969 {
2970# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2971 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVM->pgm.s, GCPtrPage);
2972# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2973 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2974 PX86PDPAE pPDDst;
2975 X86PDEPAE PdeDst;
2976# if PGM_GST_TYPE != PGM_TYPE_PAE
2977 X86PDPE PdpeSrc;
2978
2979 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
2980 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
2981# endif
2982 int rc = pgmShwSyncPaePDPtr(pVM, GCPtrPage, &PdpeSrc, &pPDDst);
2983 if (rc != VINF_SUCCESS)
2984 {
2985 AssertRC(rc);
2986 return rc;
2987 }
2988 Assert(pPDDst);
2989 PdeDst = pPDDst->a[iPDDst];
2990
2991# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2992 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2993 PX86PDPAE pPDDst;
2994 X86PDEPAE PdeDst;
2995
2996# if PGM_GST_TYPE == PGM_TYPE_PROT
2997 /* AMD-V nested paging */
2998 X86PML4E Pml4eSrc;
2999 X86PDPE PdpeSrc;
3000 PX86PML4E pPml4eSrc = &Pml4eSrc;
3001
3002 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3003 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3004 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3005# endif
3006
3007 int rc = pgmShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3008 if (rc != VINF_SUCCESS)
3009 {
3010 AssertRC(rc);
3011 return rc;
3012 }
3013 Assert(pPDDst);
3014 PdeDst = pPDDst->a[iPDDst];
3015# endif
3016 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3017 {
3018 if (!PdeDst.n.u1Present)
3019 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3020 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
3021 else
3022 {
3023 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3024 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3025 * makes no sense to prefetch more than one page.
3026 */
3027 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
3028 if (RT_SUCCESS(rc))
3029 rc = VINF_SUCCESS;
3030 }
3031 }
3032 }
3033 return rc;
3034
3035#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3036 return VINF_SUCCESS; /* ignore */
3037#endif
3038}
3039
3040
3041
3042
3043/**
3044 * Syncs a page during a PGMVerifyAccess() call.
3045 *
3046 * @returns VBox status code (informational included).
3047 * @param GCPtrPage The address of the page to sync.
3048 * @param fPage The effective guest page flags.
3049 * @param uErr The trap error code.
3050 */
3051PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3052{
3053 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3054
3055 Assert(!HWACCMIsNestedPagingActive(pVM));
3056#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3057 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3058
3059# ifndef IN_RING0
3060 if (!(fPage & X86_PTE_US))
3061 {
3062 /*
3063 * Mark this page as safe.
3064 */
3065 /** @todo not correct for pages that contain both code and data!! */
3066 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3067 CSAMMarkPage(pVM, (RTRCPTR)GCPtrPage, true);
3068 }
3069# endif
3070
3071 /*
3072 * Get guest PD and index.
3073 */
3074# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3075# if PGM_GST_TYPE == PGM_TYPE_32BIT
3076 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3077 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3078# elif PGM_GST_TYPE == PGM_TYPE_PAE
3079 unsigned iPDSrc;
3080 X86PDPE PdpeSrc;
3081 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3082
3083 if (pPDSrc)
3084 {
3085 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3086 return VINF_EM_RAW_GUEST_TRAP;
3087 }
3088# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3089 unsigned iPDSrc;
3090 PX86PML4E pPml4eSrc;
3091 X86PDPE PdpeSrc;
3092 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3093 if (!pPDSrc)
3094 {
3095 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3096 return VINF_EM_RAW_GUEST_TRAP;
3097 }
3098# endif
3099# else
3100 PGSTPD pPDSrc = NULL;
3101 const unsigned iPDSrc = 0;
3102# endif
3103 int rc = VINF_SUCCESS;
3104
3105 /*
3106 * First check if the shadow pd is present.
3107 */
3108# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3109 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
3110# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3111 PX86PDEPAE pPdeDst;
3112 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3113 PX86PDPAE pPDDst;
3114# if PGM_GST_TYPE != PGM_TYPE_PAE
3115 X86PDPE PdpeSrc;
3116
3117 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3118 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3119# endif
3120 rc = pgmShwSyncPaePDPtr(pVM, GCPtrPage, &PdpeSrc, &pPDDst);
3121 if (rc != VINF_SUCCESS)
3122 {
3123 AssertRC(rc);
3124 return rc;
3125 }
3126 Assert(pPDDst);
3127 pPdeDst = &pPDDst->a[iPDDst];
3128
3129# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3130 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3131 PX86PDPAE pPDDst;
3132 PX86PDEPAE pPdeDst;
3133
3134# if PGM_GST_TYPE == PGM_TYPE_PROT
3135 /* AMD-V nested paging */
3136 X86PML4E Pml4eSrc;
3137 X86PDPE PdpeSrc;
3138 PX86PML4E pPml4eSrc = &Pml4eSrc;
3139
3140 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3141 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3142 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3143# endif
3144
3145 rc = pgmShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3146 if (rc != VINF_SUCCESS)
3147 {
3148 AssertRC(rc);
3149 return rc;
3150 }
3151 Assert(pPDDst);
3152 pPdeDst = &pPDDst->a[iPDDst];
3153# endif
3154
3155 if (!pPdeDst->n.u1Present)
3156 {
3157# if defined(IN_RC)
3158 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3159 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3160# endif
3161 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
3162# if defined(IN_RC)
3163 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3164 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3165# endif
3166 AssertRC(rc);
3167 if (rc != VINF_SUCCESS)
3168 return rc;
3169 }
3170
3171# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3172 /* Check for dirty bit fault */
3173 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3174 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3175 Log(("PGMVerifyAccess: success (dirty)\n"));
3176 else
3177 {
3178 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3179#else
3180 {
3181 GSTPDE PdeSrc;
3182 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3183 PdeSrc.n.u1Present = 1;
3184 PdeSrc.n.u1Write = 1;
3185 PdeSrc.n.u1Accessed = 1;
3186 PdeSrc.n.u1User = 1;
3187
3188#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3189 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3190 if (uErr & X86_TRAP_PF_US)
3191 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3192 else /* supervisor */
3193 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3194
3195 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
3196 if (RT_SUCCESS(rc))
3197 {
3198 /* Page was successfully synced */
3199 Log2(("PGMVerifyAccess: success (sync)\n"));
3200 rc = VINF_SUCCESS;
3201 }
3202 else
3203 {
3204 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3205 return VINF_EM_RAW_GUEST_TRAP;
3206 }
3207 }
3208 return rc;
3209
3210#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3211
3212 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3213 return VERR_INTERNAL_ERROR;
3214#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3215}
3216
3217
3218#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3219# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
3220/**
3221 * Figures out which kind of shadow page this guest PDE warrants.
3222 *
3223 * @returns Shadow page kind.
3224 * @param pPdeSrc The guest PDE in question.
3225 * @param cr4 The current guest cr4 value.
3226 */
3227DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const GSTPDE *pPdeSrc, uint32_t cr4)
3228{
3229# if PMG_GST_TYPE == PGM_TYPE_AMD64
3230 if (!pPdeSrc->n.u1Size)
3231# else
3232 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
3233# endif
3234 return BTH_PGMPOOLKIND_PT_FOR_PT;
3235 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
3236 //{
3237 // case 0:
3238 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
3239 // case X86_PDE4M_RW:
3240 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
3241 // case X86_PDE4M_US:
3242 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
3243 // case X86_PDE4M_RW | X86_PDE4M_US:
3244 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
3245# if 0
3246 // case X86_PDE4M_PAE_NX:
3247 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
3248 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
3249 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
3250 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
3251 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
3252 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
3253 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
3254# endif
3255 return BTH_PGMPOOLKIND_PT_FOR_BIG;
3256 //}
3257}
3258# endif
3259#endif
3260
3261#undef MY_STAM_COUNTER_INC
3262#define MY_STAM_COUNTER_INC(a) do { } while (0)
3263
3264
3265/**
3266 * Syncs the paging hierarchy starting at CR3.
3267 *
3268 * @returns VBox status code, no specials.
3269 * @param pVM The virtual machine.
3270 * @param cr0 Guest context CR0 register
3271 * @param cr3 Guest context CR3 register
3272 * @param cr4 Guest context CR4 register
3273 * @param fGlobal Including global page directories or not
3274 */
3275PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3276{
3277 if (VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
3278 fGlobal = true; /* Change this CR3 reload to be a global one. */
3279
3280 LogFlow(("SyncCR3 %d\n", fGlobal));
3281
3282#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3283 /*
3284 * Update page access handlers.
3285 * The virtual are always flushed, while the physical are only on demand.
3286 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3287 * have to look into that later because it will have a bad influence on the performance.
3288 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3289 * bird: Yes, but that won't work for aliases.
3290 */
3291 /** @todo this MUST go away. See #1557. */
3292 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3293 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3294 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3295#endif
3296
3297#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3298 /*
3299 * Nested / EPT - almost no work.
3300 */
3301 /** @todo check if this is really necessary; the call does it as well... */
3302 HWACCMFlushTLB(pVM);
3303 return VINF_SUCCESS;
3304
3305#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3306 /*
3307 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3308 * out the shadow parts when the guest modifies its tables.
3309 */
3310 return VINF_SUCCESS;
3311
3312#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3313
3314# ifdef PGM_WITHOUT_MAPPINGS
3315 Assert(pVM->pgm.s.fMappingsFixed);
3316 return VINF_SUCCESS;
3317# else
3318 /* Nothing to do when mappings are fixed. */
3319 if (pVM->pgm.s.fMappingsFixed)
3320 return VINF_SUCCESS;
3321
3322 int rc = PGMMapResolveConflicts(pVM);
3323 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3324 if (rc == VINF_PGM_SYNC_CR3)
3325 {
3326 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3327 return VINF_PGM_SYNC_CR3;
3328 }
3329# endif
3330 return VINF_SUCCESS;
3331#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3332}
3333
3334
3335
3336
3337#ifdef VBOX_STRICT
3338#ifdef IN_RC
3339# undef AssertMsgFailed
3340# define AssertMsgFailed Log
3341#endif
3342#ifdef IN_RING3
3343# include <VBox/dbgf.h>
3344
3345/**
3346 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3347 *
3348 * @returns VBox status code (VINF_SUCCESS).
3349 * @param pVM The VM handle.
3350 * @param cr3 The root of the hierarchy.
3351 * @param crr The cr4, only PAE and PSE is currently used.
3352 * @param fLongMode Set if long mode, false if not long mode.
3353 * @param cMaxDepth Number of levels to dump.
3354 * @param pHlp Pointer to the output functions.
3355 */
3356__BEGIN_DECLS
3357VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3358__END_DECLS
3359
3360#endif
3361
3362/**
3363 * Checks that the shadow page table is in sync with the guest one.
3364 *
3365 * @returns The number of errors.
3366 * @param pVM The virtual machine.
3367 * @param cr3 Guest context CR3 register
3368 * @param cr4 Guest context CR4 register
3369 * @param GCPtr Where to start. Defaults to 0.
3370 * @param cb How much to check. Defaults to everything.
3371 */
3372PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3373{
3374#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3375 return 0;
3376#else
3377 unsigned cErrors = 0;
3378
3379#if PGM_GST_TYPE == PGM_TYPE_PAE
3380 /** @todo currently broken; crashes below somewhere */
3381 AssertFailed();
3382#endif
3383
3384#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3385 || PGM_GST_TYPE == PGM_TYPE_PAE \
3386 || PGM_GST_TYPE == PGM_TYPE_AMD64
3387
3388# if PGM_GST_TYPE == PGM_TYPE_AMD64
3389 bool fBigPagesSupported = true;
3390# else
3391 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
3392# endif
3393 PPGM pPGM = &pVM->pgm.s;
3394 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3395 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3396# ifndef IN_RING0
3397 RTHCPHYS HCPhys; /* general usage. */
3398# endif
3399 int rc;
3400
3401 /*
3402 * Check that the Guest CR3 and all its mappings are correct.
3403 */
3404 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3405 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3406 false);
3407# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3408# if PGM_GST_TYPE == PGM_TYPE_32BIT
3409 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3410# else
3411 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3412# endif
3413 AssertRCReturn(rc, 1);
3414 HCPhys = NIL_RTHCPHYS;
3415 rc = pgmRamGCPhys2HCPhys(pPGM, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3416 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3417# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3418 RTGCPHYS GCPhys;
3419 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3420 AssertRCReturn(rc, 1);
3421 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3422# endif
3423# endif /* !IN_RING0 */
3424
3425 /*
3426 * Get and check the Shadow CR3.
3427 */
3428# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3429 unsigned cPDEs = X86_PG_ENTRIES;
3430 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3431# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3432# if PGM_GST_TYPE == PGM_TYPE_32BIT
3433 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3434# else
3435 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3436# endif
3437 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3438# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3439 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3440 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3441# endif
3442 if (cb != ~(RTGCPTR)0)
3443 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3444
3445/** @todo call the other two PGMAssert*() functions. */
3446
3447# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3448 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3449# endif
3450
3451# if PGM_GST_TYPE == PGM_TYPE_AMD64
3452 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3453
3454 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3455 {
3456 PPGMPOOLPAGE pShwPdpt = NULL;
3457 PX86PML4E pPml4eSrc;
3458 PX86PML4E pPml4eDst;
3459 RTGCPHYS GCPhysPdptSrc;
3460
3461 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVM->pgm.s, iPml4);
3462 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVM->pgm.s, iPml4);
3463
3464 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3465 if (!pPml4eDst->n.u1Present)
3466 {
3467 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3468 continue;
3469 }
3470
3471 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3472 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3473
3474 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3475 {
3476 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3477 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3478 cErrors++;
3479 continue;
3480 }
3481
3482 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3483 {
3484 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3485 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3486 cErrors++;
3487 continue;
3488 }
3489
3490 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3491 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3492 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3493 {
3494 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3495 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3496 cErrors++;
3497 continue;
3498 }
3499# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3500 {
3501# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3502
3503# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3504 /*
3505 * Check the PDPTEs too.
3506 */
3507 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3508
3509 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3510 {
3511 unsigned iPDSrc;
3512 PPGMPOOLPAGE pShwPde = NULL;
3513 PX86PDPE pPdpeDst;
3514 RTGCPHYS GCPhysPdeSrc;
3515# if PGM_GST_TYPE == PGM_TYPE_PAE
3516 X86PDPE PdpeSrc;
3517 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3518 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
3519# else
3520 PX86PML4E pPml4eSrc;
3521 X86PDPE PdpeSrc;
3522 PX86PDPT pPdptDst;
3523 PX86PDPAE pPDDst;
3524 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3525
3526 rc = pgmShwGetLongModePDPtr(pVM, GCPtr, NULL, &pPdptDst, &pPDDst);
3527 if (rc != VINF_SUCCESS)
3528 {
3529 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3530 GCPtr += 512 * _2M;
3531 continue; /* next PDPTE */
3532 }
3533 Assert(pPDDst);
3534# endif
3535 Assert(iPDSrc == 0);
3536
3537 pPdpeDst = &pPdptDst->a[iPdpt];
3538
3539 if (!pPdpeDst->n.u1Present)
3540 {
3541 GCPtr += 512 * _2M;
3542 continue; /* next PDPTE */
3543 }
3544
3545 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3546 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3547
3548 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3549 {
3550 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3551 GCPtr += 512 * _2M;
3552 cErrors++;
3553 continue;
3554 }
3555
3556 if (GCPhysPdeSrc != pShwPde->GCPhys)
3557 {
3558# if PGM_GST_TYPE == PGM_TYPE_AMD64
3559 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3560# else
3561 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3562# endif
3563 GCPtr += 512 * _2M;
3564 cErrors++;
3565 continue;
3566 }
3567
3568# if PGM_GST_TYPE == PGM_TYPE_AMD64
3569 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3570 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3571 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3572 {
3573 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3574 GCPtr += 512 * _2M;
3575 cErrors++;
3576 continue;
3577 }
3578# endif
3579
3580# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3581 {
3582# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3583# if PGM_GST_TYPE == PGM_TYPE_32BIT
3584 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3585# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3586 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVM->pgm.s);
3587# endif
3588# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3589 /*
3590 * Iterate the shadow page directory.
3591 */
3592 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3593 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3594
3595 for (;
3596 iPDDst < cPDEs;
3597 iPDDst++, GCPtr += cIncrement)
3598 {
3599# if PGM_SHW_TYPE == PGM_TYPE_PAE
3600 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3601# else
3602 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3603# endif
3604 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3605 {
3606 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3607 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3608 {
3609 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3610 cErrors++;
3611 continue;
3612 }
3613 }
3614 else if ( (PdeDst.u & X86_PDE_P)
3615 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3616 )
3617 {
3618 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3619 PPGMPOOLPAGE pPoolPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
3620 if (!pPoolPage)
3621 {
3622 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3623 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3624 cErrors++;
3625 continue;
3626 }
3627 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3628
3629 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3630 {
3631 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3632 GCPtr, (uint64_t)PdeDst.u));
3633 cErrors++;
3634 }
3635
3636 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3637 {
3638 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3639 GCPtr, (uint64_t)PdeDst.u));
3640 cErrors++;
3641 }
3642
3643 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3644 if (!PdeSrc.n.u1Present)
3645 {
3646 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3647 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3648 cErrors++;
3649 continue;
3650 }
3651
3652 if ( !PdeSrc.b.u1Size
3653 || !fBigPagesSupported)
3654 {
3655 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3656# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3657 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3658# endif
3659 }
3660 else
3661 {
3662# if PGM_GST_TYPE == PGM_TYPE_32BIT
3663 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3664 {
3665 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3666 GCPtr, (uint64_t)PdeSrc.u));
3667 cErrors++;
3668 continue;
3669 }
3670# endif
3671 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3672# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3673 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3674# endif
3675 }
3676
3677 if ( pPoolPage->enmKind
3678 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3679 {
3680 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3681 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3682 cErrors++;
3683 }
3684
3685 PPGMPAGE pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3686 if (!pPhysPage)
3687 {
3688 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3689 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3690 cErrors++;
3691 continue;
3692 }
3693
3694 if (GCPhysGst != pPoolPage->GCPhys)
3695 {
3696 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3697 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3698 cErrors++;
3699 continue;
3700 }
3701
3702 if ( !PdeSrc.b.u1Size
3703 || !fBigPagesSupported)
3704 {
3705 /*
3706 * Page Table.
3707 */
3708 const GSTPT *pPTSrc;
3709 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3710 if (RT_FAILURE(rc))
3711 {
3712 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3713 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3714 cErrors++;
3715 continue;
3716 }
3717 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3718 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3719 {
3720 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3721 // (This problem will go away when/if we shadow multiple CR3s.)
3722 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3723 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3724 cErrors++;
3725 continue;
3726 }
3727 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3728 {
3729 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3730 GCPtr, (uint64_t)PdeDst.u));
3731 cErrors++;
3732 continue;
3733 }
3734
3735 /* iterate the page table. */
3736# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3737 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3738 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3739# else
3740 const unsigned offPTSrc = 0;
3741# endif
3742 for (unsigned iPT = 0, off = 0;
3743 iPT < RT_ELEMENTS(pPTDst->a);
3744 iPT++, off += PAGE_SIZE)
3745 {
3746 const SHWPTE PteDst = pPTDst->a[iPT];
3747
3748 /* skip not-present entries. */
3749 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3750 continue;
3751 Assert(PteDst.n.u1Present);
3752
3753 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3754 if (!PteSrc.n.u1Present)
3755 {
3756# ifdef IN_RING3
3757 PGMAssertHandlerAndFlagsInSync(pVM);
3758 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3759# endif
3760 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3761 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3762 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3763 cErrors++;
3764 continue;
3765 }
3766
3767 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3768# if 1 /** @todo sync accessed bit properly... */
3769 fIgnoreFlags |= X86_PTE_A;
3770# endif
3771
3772 /* match the physical addresses */
3773 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3774 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3775
3776# ifdef IN_RING3
3777 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3778 if (RT_FAILURE(rc))
3779 {
3780 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3781 {
3782 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3783 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3784 cErrors++;
3785 continue;
3786 }
3787 }
3788 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3789 {
3790 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3791 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3792 cErrors++;
3793 continue;
3794 }
3795# endif
3796
3797 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3798 if (!pPhysPage)
3799 {
3800# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3801 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3802 {
3803 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3804 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3805 cErrors++;
3806 continue;
3807 }
3808# endif
3809 if (PteDst.n.u1Write)
3810 {
3811 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3812 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3813 cErrors++;
3814 }
3815 fIgnoreFlags |= X86_PTE_RW;
3816 }
3817 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
3818 {
3819 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3820 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3821 cErrors++;
3822 continue;
3823 }
3824
3825 /* flags */
3826 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3827 {
3828 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3829 {
3830 if (PteDst.n.u1Write)
3831 {
3832 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3833 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3834 cErrors++;
3835 continue;
3836 }
3837 fIgnoreFlags |= X86_PTE_RW;
3838 }
3839 else
3840 {
3841 if (PteDst.n.u1Present)
3842 {
3843 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3844 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3845 cErrors++;
3846 continue;
3847 }
3848 fIgnoreFlags |= X86_PTE_P;
3849 }
3850 }
3851 else
3852 {
3853 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3854 {
3855 if (PteDst.n.u1Write)
3856 {
3857 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3858 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3859 cErrors++;
3860 continue;
3861 }
3862 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3863 {
3864 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3865 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3866 cErrors++;
3867 continue;
3868 }
3869 if (PteDst.n.u1Dirty)
3870 {
3871 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3872 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3873 cErrors++;
3874 }
3875# if 0 /** @todo sync access bit properly... */
3876 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3877 {
3878 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3879 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3880 cErrors++;
3881 }
3882 fIgnoreFlags |= X86_PTE_RW;
3883# else
3884 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3885# endif
3886 }
3887 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3888 {
3889 /* access bit emulation (not implemented). */
3890 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3891 {
3892 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3893 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3894 cErrors++;
3895 continue;
3896 }
3897 if (!PteDst.n.u1Accessed)
3898 {
3899 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3900 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3901 cErrors++;
3902 }
3903 fIgnoreFlags |= X86_PTE_P;
3904 }
3905# ifdef DEBUG_sandervl
3906 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3907# endif
3908 }
3909
3910 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3911 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
3912 )
3913 {
3914 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3915 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3916 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3917 cErrors++;
3918 continue;
3919 }
3920 } /* foreach PTE */
3921 }
3922 else
3923 {
3924 /*
3925 * Big Page.
3926 */
3927 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
3928 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
3929 {
3930 if (PdeDst.n.u1Write)
3931 {
3932 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3933 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3934 cErrors++;
3935 continue;
3936 }
3937 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
3938 {
3939 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3940 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3941 cErrors++;
3942 continue;
3943 }
3944# if 0 /** @todo sync access bit properly... */
3945 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
3946 {
3947 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3948 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3949 cErrors++;
3950 }
3951 fIgnoreFlags |= X86_PTE_RW;
3952# else
3953 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3954# endif
3955 }
3956 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3957 {
3958 /* access bit emulation (not implemented). */
3959 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
3960 {
3961 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3962 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3963 cErrors++;
3964 continue;
3965 }
3966 if (!PdeDst.n.u1Accessed)
3967 {
3968 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3969 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3970 cErrors++;
3971 }
3972 fIgnoreFlags |= X86_PTE_P;
3973 }
3974
3975 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
3976 {
3977 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
3978 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
3979 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3980 cErrors++;
3981 }
3982
3983 /* iterate the page table. */
3984 for (unsigned iPT = 0, off = 0;
3985 iPT < RT_ELEMENTS(pPTDst->a);
3986 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
3987 {
3988 const SHWPTE PteDst = pPTDst->a[iPT];
3989
3990 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3991 {
3992 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
3993 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3994 cErrors++;
3995 }
3996
3997 /* skip not-present entries. */
3998 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
3999 continue;
4000
4001 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4002
4003 /* match the physical addresses */
4004 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4005
4006# ifdef IN_RING3
4007 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4008 if (RT_FAILURE(rc))
4009 {
4010 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4011 {
4012 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4013 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4014 cErrors++;
4015 }
4016 }
4017 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4018 {
4019 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4020 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4021 cErrors++;
4022 continue;
4023 }
4024# endif
4025 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
4026 if (!pPhysPage)
4027 {
4028# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4029 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4030 {
4031 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4032 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4033 cErrors++;
4034 continue;
4035 }
4036# endif
4037 if (PteDst.n.u1Write)
4038 {
4039 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4040 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4041 cErrors++;
4042 }
4043 fIgnoreFlags |= X86_PTE_RW;
4044 }
4045 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4046 {
4047 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4048 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4049 cErrors++;
4050 continue;
4051 }
4052
4053 /* flags */
4054 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4055 {
4056 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4057 {
4058 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4059 {
4060 if (PteDst.n.u1Write)
4061 {
4062 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4063 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4064 cErrors++;
4065 continue;
4066 }
4067 fIgnoreFlags |= X86_PTE_RW;
4068 }
4069 }
4070 else
4071 {
4072 if (PteDst.n.u1Present)
4073 {
4074 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4075 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4076 cErrors++;
4077 continue;
4078 }
4079 fIgnoreFlags |= X86_PTE_P;
4080 }
4081 }
4082
4083 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4084 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4085 )
4086 {
4087 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4088 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4089 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4090 cErrors++;
4091 continue;
4092 }
4093 } /* for each PTE */
4094 }
4095 }
4096 /* not present */
4097
4098 } /* for each PDE */
4099
4100 } /* for each PDPTE */
4101
4102 } /* for each PML4E */
4103
4104# ifdef DEBUG
4105 if (cErrors)
4106 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4107# endif
4108
4109#endif /* GST == 32BIT, PAE or AMD64 */
4110 return cErrors;
4111
4112#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4113}
4114#endif /* VBOX_STRICT */
4115
4116
4117/**
4118 * Sets up the CR3 for shadow paging
4119 *
4120 * @returns Strict VBox status code.
4121 * @retval VINF_SUCCESS.
4122 *
4123 * @param pVM VM handle.
4124 * @param GCPhysCR3 The physical address in the CR3 register.
4125 */
4126PGM_BTH_DECL(int, MapCR3)(PVM pVM, RTGCPHYS GCPhysCR3)
4127{
4128 /* Update guest paging info. */
4129#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4130 || PGM_GST_TYPE == PGM_TYPE_PAE \
4131 || PGM_GST_TYPE == PGM_TYPE_AMD64
4132
4133 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4134
4135 /*
4136 * Map the page CR3 points at.
4137 */
4138 RTHCPTR HCPtrGuestCR3;
4139 RTHCPHYS HCPhysGuestCR3;
4140# ifdef VBOX_WITH_NEW_PHYS_CODE
4141 /** @todo this needs some reworking. current code is just a big hack. */
4142# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4143# if 1 /* temp hack */
4144 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
4145 return VINF_PGM_SYNC_CR3;
4146# else
4147 AssertFailedReturn(VERR_INTERNAL_ERROR);
4148# endif
4149 int rc = VERR_INTERNAL_ERROR;
4150# else
4151 pgmLock(pVM);
4152 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4153 AssertReturn(pPage, VERR_INTERNAL_ERROR);
4154 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3);
4155 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPage);
4156 pgmUnlock(pVM);
4157# endif
4158# else /* !VBOX_WITH_NEW_PHYS_CODE */
4159 int rc = pgmRamGCPhys2HCPtrAndHCPhys(&pVM->pgm.s, GCPhysCR3 & GST_CR3_PAGE_MASK, &HCPtrGuestCR3, &HCPhysGuestCR3);
4160# endif /* !VBOX_WITH_NEW_PHYS_CODE */
4161 if (RT_SUCCESS(rc))
4162 {
4163 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4164 if (RT_SUCCESS(rc))
4165 {
4166# ifdef IN_RC
4167 PGM_INVL_PG(pVM->pgm.s.GCPtrCR3Mapping);
4168# endif
4169# if PGM_GST_TYPE == PGM_TYPE_32BIT
4170 pVM->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4171# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4172 pVM->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4173# endif
4174 pVM->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))pVM->pgm.s.GCPtrCR3Mapping;
4175
4176# elif PGM_GST_TYPE == PGM_TYPE_PAE
4177 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4178 pVM->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4179# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4180 pVM->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4181# endif
4182 pVM->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RCPTRTYPE(uint8_t *))pVM->pgm.s.GCPtrCR3Mapping + off);
4183 Log(("Cached mapping %RRv\n", pVM->pgm.s.pGstPaePdptRC));
4184
4185 /*
4186 * Map the 4 PDs too.
4187 */
4188 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVM->pgm.s);
4189 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4190 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4191 {
4192 if (pGuestPDPT->a[i].n.u1Present)
4193 {
4194 RTHCPTR HCPtr;
4195 RTHCPHYS HCPhys;
4196 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4197# ifdef VBOX_WITH_NEW_PHYS_CODE
4198# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4199 AssertFailedReturn(VERR_INTERNAL_ERROR);
4200 int rc2 = VERR_INTERNAL_ERROR;
4201# else
4202 pgmLock(pVM);
4203 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4204 AssertReturn(pPage, VERR_INTERNAL_ERROR);
4205 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4206 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4207 pgmUnlock(pVM);
4208# endif
4209# else /* !VBOX_WITH_NEW_PHYS_CODE */
4210 int rc2 = pgmRamGCPhys2HCPtrAndHCPhys(&pVM->pgm.s, GCPhys, &HCPtr, &HCPhys);
4211# endif /* !VBOX_WITH_NEW_PHYS_CODE */
4212 if (RT_SUCCESS(rc2))
4213 {
4214 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4215 AssertRCReturn(rc, rc);
4216
4217 pVM->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4218# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4219 pVM->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4220# endif
4221 pVM->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))GCPtr;
4222 pVM->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4223 PGM_INVL_PG(GCPtr); /** @todo This ends up calling HWACCMInvalidatePage, is that correct? */
4224 continue;
4225 }
4226 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4227 }
4228
4229 pVM->pgm.s.apGstPaePDsR3[i] = 0;
4230# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4231 pVM->pgm.s.apGstPaePDsR0[i] = 0;
4232# endif
4233 pVM->pgm.s.apGstPaePDsRC[i] = 0;
4234 pVM->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4235 PGM_INVL_PG(GCPtr); /** @todo this shouldn't be necessary? */
4236 }
4237
4238# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4239 pVM->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4240# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4241 pVM->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4242# endif
4243# endif
4244 }
4245 else
4246 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4247 }
4248 else
4249 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4250
4251#else /* prot/real stub */
4252 int rc = VINF_SUCCESS;
4253#endif
4254
4255 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4256# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4257 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4258 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4259 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4260 && PGM_GST_TYPE != PGM_TYPE_PROT))
4261
4262 Assert(!HWACCMIsNestedPagingActive(pVM));
4263
4264 /*
4265 * Update the shadow root page as well since that's not fixed.
4266 */
4267 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4268 PPGMPOOLPAGE pOldShwPageCR3 = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
4269 uint32_t iOldShwUserTable = pVM->pgm.s.iShwUserTable;
4270 uint32_t iOldShwUser = pVM->pgm.s.iShwUser;
4271 PPGMPOOLPAGE pNewShwPageCR3;
4272
4273 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4274 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3);
4275 AssertFatalRC(rc);
4276 rc = VINF_SUCCESS;
4277
4278 /* Mark the page as locked; disallow flushing. */
4279 pgmPoolLockPage(pPool, pNewShwPageCR3);
4280
4281# ifdef IN_RC
4282 /* NOTE: We can't deal with jumps to ring 3 here as we're now in an inconsistent state! */
4283 bool fLog = VMMGCLogDisable(pVM);
4284# endif
4285
4286 pVM->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4287 pVM->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4288 pVM->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4289# ifdef IN_RING0
4290 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4291 pVM->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4292# elif defined(IN_RC)
4293 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4294 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4295# else
4296 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4297 pVM->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4298# endif
4299
4300# ifndef PGM_WITHOUT_MAPPINGS
4301 /* Apply all hypervisor mappings to the new CR3.
4302 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4303 * make sure we check for conflicts in the new CR3 root.
4304 */
4305# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4306 Assert(VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL) || VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
4307# endif
4308 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4309 AssertRCReturn(rc, rc);
4310# endif
4311
4312 /* Set the current hypervisor CR3. */
4313 CPUMSetHyperCR3(pVM, PGMGetHyperCR3(pVM));
4314 SELMShadowCR3Changed(pVM);
4315
4316# ifdef IN_RC
4317 VMMGCLogRestore(pVM, fLog);
4318# endif
4319
4320 /* Clean up the old CR3 root. */
4321 if (pOldShwPageCR3)
4322 {
4323 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4324# ifndef PGM_WITHOUT_MAPPINGS
4325 /* Remove the hypervisor mappings from the shadow page table. */
4326 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4327# endif
4328 /* Mark the page as unlocked; allow flushing again. */
4329 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4330
4331 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4332 }
4333
4334# endif
4335
4336 return rc;
4337}
4338
4339/**
4340 * Unmaps the shadow CR3.
4341 *
4342 * @returns VBox status, no specials.
4343 * @param pVM VM handle.
4344 */
4345PGM_BTH_DECL(int, UnmapCR3)(PVM pVM)
4346{
4347 LogFlow(("UnmapCR3\n"));
4348
4349 int rc = VINF_SUCCESS;
4350
4351 /* Update guest paging info. */
4352#if PGM_GST_TYPE == PGM_TYPE_32BIT
4353 pVM->pgm.s.pGst32BitPdR3 = 0;
4354#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4355 pVM->pgm.s.pGst32BitPdR0 = 0;
4356#endif
4357 pVM->pgm.s.pGst32BitPdRC = 0;
4358
4359#elif PGM_GST_TYPE == PGM_TYPE_PAE
4360 pVM->pgm.s.pGstPaePdptR3 = 0;
4361# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4362 pVM->pgm.s.pGstPaePdptR0 = 0;
4363# endif
4364 pVM->pgm.s.pGstPaePdptRC = 0;
4365 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4366 {
4367 pVM->pgm.s.apGstPaePDsR3[i] = 0;
4368# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4369 pVM->pgm.s.apGstPaePDsR0[i] = 0;
4370# endif
4371 pVM->pgm.s.apGstPaePDsRC[i] = 0;
4372 pVM->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4373 }
4374
4375#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4376 pVM->pgm.s.pGstAmd64Pml4R3 = 0;
4377# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4378 pVM->pgm.s.pGstAmd64Pml4R0 = 0;
4379# endif
4380
4381#else /* prot/real mode stub */
4382 /* nothing to do */
4383#endif
4384
4385#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4386 /* Update shadow paging info. */
4387# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4388 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4389 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4390
4391# if PGM_GST_TYPE != PGM_TYPE_REAL
4392 Assert(!HWACCMIsNestedPagingActive(pVM));
4393# endif
4394
4395# ifndef PGM_WITHOUT_MAPPINGS
4396 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4397 /* Remove the hypervisor mappings from the shadow page table. */
4398 pgmMapDeactivateCR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4399# endif
4400
4401 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4402 {
4403 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4404
4405 Assert(pVM->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4406
4407 /* Mark the page as unlocked; allow flushing again. */
4408 pgmPoolUnlockPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4409
4410 pgmPoolFreeByPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3), pVM->pgm.s.iShwUser, pVM->pgm.s.iShwUserTable);
4411 pVM->pgm.s.pShwPageCR3R3 = 0;
4412 pVM->pgm.s.pShwPageCR3R0 = 0;
4413 pVM->pgm.s.pShwPageCR3RC = 0;
4414 pVM->pgm.s.iShwUser = 0;
4415 pVM->pgm.s.iShwUserTable = 0;
4416 }
4417# endif
4418#endif /* !IN_RC*/
4419
4420 return rc;
4421}
4422
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette