VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 18291

Last change on this file since 18291 was 18192, checked in by vboxsync, 16 years ago

nc

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 188.8 KB
Line 
1/* $Id: PGMAllBth.h 18192 2009-03-24 14:40:03Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27__BEGIN_DECLS
28PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42PGM_BTH_DECL(int, MapCR3)(PVM pVM, RTGCPHYS GCPhysCR3);
43PGM_BTH_DECL(int, UnmapCR3)(PVM pVM);
44__END_DECLS
45
46
47/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
48#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
49# error "Invalid combination; PAE guest implies PAE shadow"
50#endif
51
52#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
53 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
54# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
55#endif
56
57#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
58 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
59# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
60#endif
61
62#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
63 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
64# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
65#endif
66
67#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
68# define PGM_WITHOUT_MAPPINGS
69#endif
70
71
72#ifndef IN_RING3
73/**
74 * #PF Handler for raw-mode guest execution.
75 *
76 * @returns VBox status code (appropriate for trap handling and GC return).
77 * @param pVM VM Handle.
78 * @param uErr The trap error code.
79 * @param pRegFrame Trap register frame.
80 * @param pvFault The fault address.
81 */
82PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
83{
84# if defined(IN_RC) && defined(VBOX_STRICT)
85 PGMDynCheckLocks(pVM);
86# endif
87
88# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
89 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
90 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
91
92# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
93 /*
94 * Hide the instruction fetch trap indicator for now.
95 */
96 /** @todo NXE will change this and we must fix NXE in the switcher too! */
97 if (uErr & X86_TRAP_PF_ID)
98 {
99 uErr &= ~X86_TRAP_PF_ID;
100 TRPMSetErrorCode(pVM, uErr);
101 }
102# endif
103
104 /*
105 * Get PDs.
106 */
107 int rc;
108# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
109# if PGM_GST_TYPE == PGM_TYPE_32BIT
110 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
111 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
112
113# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
114
115# if PGM_GST_TYPE == PGM_TYPE_PAE
116 unsigned iPDSrc;
117 X86PDPE PdpeSrc;
118 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
119
120# elif PGM_GST_TYPE == PGM_TYPE_AMD64
121 unsigned iPDSrc;
122 PX86PML4E pPml4eSrc;
123 X86PDPE PdpeSrc;
124 PGSTPD pPDSrc;
125
126 pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
127 Assert(pPml4eSrc);
128# endif
129
130 /* Quick check for a valid guest trap. (PAE & AMD64) */
131 if (!pPDSrc)
132 {
133# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
134 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK));
135# else
136 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK));
137# endif
138 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2GuestTrap; });
139 TRPMSetErrorCode(pVM, uErr);
140 return VINF_EM_RAW_GUEST_TRAP;
141 }
142# endif
143
144# else /* !PGM_WITH_PAGING */
145 PGSTPD pPDSrc = NULL;
146 const unsigned iPDSrc = 0;
147# endif /* !PGM_WITH_PAGING */
148
149
150# if PGM_SHW_TYPE == PGM_TYPE_32BIT
151 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
152 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVM->pgm.s);
153
154# elif PGM_SHW_TYPE == PGM_TYPE_PAE
155 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
156
157 PX86PDPAE pPDDst;
158# if PGM_GST_TYPE != PGM_TYPE_PAE
159 X86PDPE PdpeSrc;
160
161 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
162 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
163# endif
164 rc = pgmShwSyncPaePDPtr(pVM, pvFault, &PdpeSrc, &pPDDst);
165 if (rc != VINF_SUCCESS)
166 {
167 AssertRC(rc);
168 return rc;
169 }
170 Assert(pPDDst);
171
172# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
173 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
174 PX86PDPAE pPDDst;
175# if PGM_GST_TYPE == PGM_TYPE_PROT
176 /* AMD-V nested paging */
177 X86PML4E Pml4eSrc;
178 X86PDPE PdpeSrc;
179 PX86PML4E pPml4eSrc = &Pml4eSrc;
180
181 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
182 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
183 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
184# endif
185
186 rc = pgmShwSyncLongModePDPtr(pVM, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
187 if (rc != VINF_SUCCESS)
188 {
189 AssertRC(rc);
190 return rc;
191 }
192 Assert(pPDDst);
193
194# elif PGM_SHW_TYPE == PGM_TYPE_EPT
195 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
196 PEPTPD pPDDst;
197
198 rc = pgmShwGetEPTPDPtr(pVM, pvFault, NULL, &pPDDst);
199 if (rc != VINF_SUCCESS)
200 {
201 AssertRC(rc);
202 return rc;
203 }
204 Assert(pPDDst);
205# endif
206
207# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
208 /*
209 * If we successfully correct the write protection fault due to dirty bit
210 * tracking, or this page fault is a genuine one, then return immediately.
211 */
212 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
213 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
214 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
215 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
216 || rc == VINF_EM_RAW_GUEST_TRAP)
217 {
218 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
219 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVM->pgm.s.StatRZTrap0eTime2GuestTrap; });
220 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
221 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
222 }
223
224 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0ePD[iPDSrc]);
225# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
226
227 /*
228 * A common case is the not-present error caused by lazy page table syncing.
229 *
230 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
231 * so we can safely assume that the shadow PT is present when calling SyncPage later.
232 *
233 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
234 * of mapping conflict and defer to SyncCR3 in R3.
235 * (Again, we do NOT support access handlers for non-present guest pages.)
236 *
237 */
238# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
239 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
240# else
241 GSTPDE PdeSrc;
242 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
243 PdeSrc.n.u1Present = 1;
244 PdeSrc.n.u1Write = 1;
245 PdeSrc.n.u1Accessed = 1;
246 PdeSrc.n.u1User = 1;
247# endif
248 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
249 && !pPDDst->a[iPDDst].n.u1Present
250 && PdeSrc.n.u1Present
251 )
252
253 {
254 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2SyncPT; });
255 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
256 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
257 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, pvFault);
258 if (RT_SUCCESS(rc))
259 {
260 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
261 return rc;
262 }
263 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
264 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
265 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
266 return VINF_PGM_SYNC_CR3;
267 }
268
269# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
270 /*
271 * Check if this address is within any of our mappings.
272 *
273 * This is *very* fast and it's gonna save us a bit of effort below and prevent
274 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
275 * (BTW, it's impossible to have physical access handlers in a mapping.)
276 */
277 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
278 {
279 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
280 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
281 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
282 {
283 if (pvFault < pMapping->GCPtr)
284 break;
285 if (pvFault - pMapping->GCPtr < pMapping->cb)
286 {
287 /*
288 * The first thing we check is if we've got an undetected conflict.
289 */
290 if (!pVM->pgm.s.fMappingsFixed)
291 {
292 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
293 while (iPT-- > 0)
294 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
295 {
296 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eConflicts);
297 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
298 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
299 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
300 return VINF_PGM_SYNC_CR3;
301 }
302 }
303
304 /*
305 * Check if the fault address is in a virtual page access handler range.
306 */
307 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
308 if ( pCur
309 && pvFault - pCur->Core.Key < pCur->cb
310 && uErr & X86_TRAP_PF_RW)
311 {
312# ifdef IN_RC
313 STAM_PROFILE_START(&pCur->Stat, h);
314 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
315 STAM_PROFILE_STOP(&pCur->Stat, h);
316# else
317 AssertFailed();
318 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
319# endif
320 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersMapping);
321 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
322 return rc;
323 }
324
325 /*
326 * Pretend we're not here and let the guest handle the trap.
327 */
328 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
329 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eGuestPFMapping);
330 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
331 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
332 return VINF_EM_RAW_GUEST_TRAP;
333 }
334 }
335 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
336 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
337# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
338
339 /*
340 * Check if this fault address is flagged for special treatment,
341 * which means we'll have to figure out the physical address and
342 * check flags associated with it.
343 *
344 * ASSUME that we can limit any special access handling to pages
345 * in page tables which the guest believes to be present.
346 */
347 if (PdeSrc.n.u1Present)
348 {
349 RTGCPHYS GCPhys = NIL_RTGCPHYS;
350
351# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
352# if PGM_GST_TYPE == PGM_TYPE_AMD64
353 bool fBigPagesSupported = true;
354# else
355 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
356# endif
357 if ( PdeSrc.b.u1Size
358 && fBigPagesSupported)
359 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
360 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
361 else
362 {
363 PGSTPT pPTSrc;
364 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
365 if (RT_SUCCESS(rc))
366 {
367 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
368 if (pPTSrc->a[iPTESrc].n.u1Present)
369 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
370 }
371 }
372# else
373 /* No paging so the fault address is the physical address */
374 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
375# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
376
377 /*
378 * If we have a GC address we'll check if it has any flags set.
379 */
380 if (GCPhys != NIL_RTGCPHYS)
381 {
382 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
383
384 PPGMPAGE pPage;
385 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
386 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
387 {
388 if ( PGM_PAGE_HAS_ACTIVE_PHYSICAL_HANDLERS(pPage)
389 || PGM_PAGE_HAS_ACTIVE_VIRTUAL_HANDLERS(pPage))
390 {
391 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
392 {
393 /*
394 * Physical page access handler.
395 */
396 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
397 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
398 if (pCur)
399 {
400# ifdef PGM_SYNC_N_PAGES
401 /*
402 * If the region is write protected and we got a page not present fault, then sync
403 * the pages. If the fault was caused by a read, then restart the instruction.
404 * In case of write access continue to the GC write handler.
405 *
406 * ASSUMES that there is only one handler per page or that they have similar write properties.
407 */
408 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
409 && !(uErr & X86_TRAP_PF_P))
410 {
411 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
412 if ( RT_FAILURE(rc)
413 || !(uErr & X86_TRAP_PF_RW)
414 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
415 {
416 AssertRC(rc);
417 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
418 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
419 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
420 return rc;
421 }
422 }
423# endif
424
425 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
426 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
427 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
428
429# if defined(IN_RC) || defined(IN_RING0)
430 if (pCur->CTX_SUFF(pfnHandler))
431 {
432 STAM_PROFILE_START(&pCur->Stat, h);
433 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pCur->CTX_SUFF(pvUser));
434 STAM_PROFILE_STOP(&pCur->Stat, h);
435 }
436 else
437# endif
438 rc = VINF_EM_RAW_EMULATE_INSTR;
439 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersPhysical);
440 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
441 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndPhys; });
442 return rc;
443 }
444 }
445# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
446 else
447 {
448# ifdef PGM_SYNC_N_PAGES
449 /*
450 * If the region is write protected and we got a page not present fault, then sync
451 * the pages. If the fault was caused by a read, then restart the instruction.
452 * In case of write access continue to the GC write handler.
453 */
454 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
455 && !(uErr & X86_TRAP_PF_P))
456 {
457 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
458 if ( RT_FAILURE(rc)
459 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
460 || !(uErr & X86_TRAP_PF_RW))
461 {
462 AssertRC(rc);
463 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
464 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
465 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
466 return rc;
467 }
468 }
469# endif
470 /*
471 * Ok, it's an virtual page access handler.
472 *
473 * Since it's faster to search by address, we'll do that first
474 * and then retry by GCPhys if that fails.
475 */
476 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
477 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
478 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
479 */
480 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
481 if (pCur)
482 {
483 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
484 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
485 || !(uErr & X86_TRAP_PF_P)
486 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
487 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
488
489 if ( pvFault - pCur->Core.Key < pCur->cb
490 && ( uErr & X86_TRAP_PF_RW
491 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
492 {
493# ifdef IN_RC
494 STAM_PROFILE_START(&pCur->Stat, h);
495 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
496 STAM_PROFILE_STOP(&pCur->Stat, h);
497# else
498 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
499# endif
500 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtual);
501 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
502 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
503 return rc;
504 }
505 /* Unhandled part of a monitored page */
506 }
507 else
508 {
509 /* Check by physical address. */
510 PPGMVIRTHANDLER pCur;
511 unsigned iPage;
512 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
513 &pCur, &iPage);
514 Assert(RT_SUCCESS(rc) || !pCur);
515 if ( pCur
516 && ( uErr & X86_TRAP_PF_RW
517 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
518 {
519 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
520# ifdef IN_RC
521 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
522 Assert(off < pCur->cb);
523 STAM_PROFILE_START(&pCur->Stat, h);
524 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
525 STAM_PROFILE_STOP(&pCur->Stat, h);
526# else
527 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
528# endif
529 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
530 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
531 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
532 return rc;
533 }
534 }
535 }
536# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
537
538 /*
539 * There is a handled area of the page, but this fault doesn't belong to it.
540 * We must emulate the instruction.
541 *
542 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
543 * we first check if this was a page-not-present fault for a page with only
544 * write access handlers. Restart the instruction if it wasn't a write access.
545 */
546 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersUnhandled);
547
548 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
549 && !(uErr & X86_TRAP_PF_P))
550 {
551 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
552 if ( RT_FAILURE(rc)
553 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
554 || !(uErr & X86_TRAP_PF_RW))
555 {
556 AssertRC(rc);
557 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
558 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
559 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
560 return rc;
561 }
562 }
563
564 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
565 * It's writing to an unhandled part of the LDT page several million times.
566 */
567 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
568 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
569 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
570 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndUnhandled; });
571 return rc;
572 } /* if any kind of handler */
573
574# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
575 if (uErr & X86_TRAP_PF_P)
576 {
577 /*
578 * The page isn't marked, but it might still be monitored by a virtual page access handler.
579 * (ASSUMES no temporary disabling of virtual handlers.)
580 */
581 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
582 * we should correct both the shadow page table and physical memory flags, and not only check for
583 * accesses within the handler region but for access to pages with virtual handlers. */
584 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
585 if (pCur)
586 {
587 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
588 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
589 || !(uErr & X86_TRAP_PF_P)
590 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
591 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
592
593 if ( pvFault - pCur->Core.Key < pCur->cb
594 && ( uErr & X86_TRAP_PF_RW
595 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
596 {
597# ifdef IN_RC
598 STAM_PROFILE_START(&pCur->Stat, h);
599 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
600 STAM_PROFILE_STOP(&pCur->Stat, h);
601# else
602 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
603# endif
604 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
605 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
606 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
607 return rc;
608 }
609 }
610 }
611# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
612 }
613 else
614 {
615 /*
616 * When the guest accesses invalid physical memory (e.g. probing
617 * of RAM or accessing a remapped MMIO range), then we'll fall
618 * back to the recompiler to emulate the instruction.
619 */
620 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
621 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersInvalid);
622 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
623 return VINF_EM_RAW_EMULATE_INSTR;
624 }
625
626 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
627
628# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
629 /*
630 * We are here only if page is present in Guest page tables and
631 * trap is not handled by our handlers.
632 *
633 * Check it for page out-of-sync situation.
634 */
635 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
636
637 if (!(uErr & X86_TRAP_PF_P))
638 {
639 /*
640 * Page is not present in our page tables.
641 * Try to sync it!
642 * BTW, fPageShw is invalid in this branch!
643 */
644 if (uErr & X86_TRAP_PF_US)
645 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
646 else /* supervisor */
647 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
648
649# if defined(LOG_ENABLED) && !defined(IN_RING0)
650 RTGCPHYS GCPhys;
651 uint64_t fPageGst;
652 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
653 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%RGp scan=%d\n",
654 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)));
655# endif /* LOG_ENABLED */
656
657# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
658 if (CPUMGetGuestCPL(pVM, pRegFrame) == 0)
659 {
660 uint64_t fPageGst;
661 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
662 if ( RT_SUCCESS(rc)
663 && !(fPageGst & X86_PTE_US))
664 {
665 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
666 if ( pvFault == (RTGCPTR)pRegFrame->eip
667 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
668# ifdef CSAM_DETECT_NEW_CODE_PAGES
669 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
670 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)) /* any new code we encounter here */
671# endif /* CSAM_DETECT_NEW_CODE_PAGES */
672 )
673 {
674 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
675 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
676 if (rc != VINF_SUCCESS)
677 {
678 /*
679 * CSAM needs to perform a job in ring 3.
680 *
681 * Sync the page before going to the host context; otherwise we'll end up in a loop if
682 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
683 */
684 LogFlow(("CSAM ring 3 job\n"));
685 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, 1, uErr);
686 AssertRC(rc2);
687
688 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
689 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2CSAM; });
690 return rc;
691 }
692 }
693# ifdef CSAM_DETECT_NEW_CODE_PAGES
694 else if ( uErr == X86_TRAP_PF_RW
695 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
696 && pRegFrame->ecx < 0x10000)
697 {
698 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
699 * to detect loading of new code pages.
700 */
701
702 /*
703 * Decode the instruction.
704 */
705 RTGCPTR PC;
706 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
707 if (rc == VINF_SUCCESS)
708 {
709 DISCPUSTATE Cpu;
710 uint32_t cbOp;
711 rc = EMInterpretDisasOneEx(pVM, PC, pRegFrame, &Cpu, &cbOp);
712
713 /* For now we'll restrict this to rep movsw/d instructions */
714 if ( rc == VINF_SUCCESS
715 && Cpu.pCurInstr->opcode == OP_MOVSWD
716 && (Cpu.prefix & PREFIX_REP))
717 {
718 CSAMMarkPossibleCodePage(pVM, pvFault);
719 }
720 }
721 }
722# endif /* CSAM_DETECT_NEW_CODE_PAGES */
723
724 /*
725 * Mark this page as safe.
726 */
727 /** @todo not correct for pages that contain both code and data!! */
728 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
729 CSAMMarkPage(pVM, (RTRCPTR)pvFault, true);
730 }
731 }
732# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
733 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
734 if (RT_SUCCESS(rc))
735 {
736 /* The page was successfully synced, return to the guest. */
737 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
738 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSync; });
739 return VINF_SUCCESS;
740 }
741 }
742 else /* uErr & X86_TRAP_PF_P: */
743 {
744 /*
745 * Write protected pages is make writable when the guest makes the first
746 * write to it. This happens for pages that are shared, write monitored
747 * and not yet allocated.
748 *
749 * Also, a side effect of not flushing global PDEs are out of sync pages due
750 * to physical monitored regions, that are no longer valid.
751 * Assume for now it only applies to the read/write flag.
752 */
753 if (RT_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
754 {
755# ifdef VBOX_WITH_NEW_PHYS_CODE
756 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
757 {
758 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n",
759 GCPhys, pPage, pvFault, uErr));
760 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
761 if (rc != VINF_SUCCESS)
762 {
763 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
764 return rc;
765 }
766 }
767 /// @todo count the above case; else
768# endif /* VBOX_WITH_NEW_PHYS_CODE */
769 if (uErr & X86_TRAP_PF_US)
770 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
771 else /* supervisor */
772 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
773
774 /*
775 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
776 * page is not present, which is not true in this case.
777 */
778 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, 1, uErr);
779 if (RT_SUCCESS(rc))
780 {
781 /*
782 * Page was successfully synced, return to guest.
783 */
784# ifdef VBOX_STRICT
785 RTGCPHYS GCPhys;
786 uint64_t fPageGst;
787 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
788 Assert(RT_SUCCESS(rc) && fPageGst & X86_PTE_RW);
789 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
790
791 uint64_t fPageShw;
792 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
793 AssertMsg(RT_SUCCESS(rc) && fPageShw & X86_PTE_RW, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
794# endif /* VBOX_STRICT */
795 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
796 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
797 return VINF_SUCCESS;
798 }
799
800 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
801 if ( CPUMGetGuestCPL(pVM, pRegFrame) == 0
802 && ((CPUMGetGuestCR0(pVM) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG)
803 && (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P))
804 {
805 uint64_t fPageGst;
806 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
807 if ( RT_SUCCESS(rc)
808 && !(fPageGst & X86_PTE_RW))
809 {
810 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
811 if (RT_SUCCESS(rc))
812 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eWPEmulInRZ);
813 else
814 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eWPEmulToR3);
815 return rc;
816 }
817 AssertMsgFailed(("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
818 }
819 }
820
821# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
822# ifdef VBOX_STRICT
823 /*
824 * Check for VMM page flags vs. Guest page flags consistency.
825 * Currently only for debug purposes.
826 */
827 if (RT_SUCCESS(rc))
828 {
829 /* Get guest page flags. */
830 uint64_t fPageGst;
831 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
832 if (RT_SUCCESS(rc))
833 {
834 uint64_t fPageShw;
835 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
836
837 /*
838 * Compare page flags.
839 * Note: we have AVL, A, D bits desynched.
840 */
841 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
842 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
843 }
844 else
845 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
846 }
847 else
848 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
849# endif /* VBOX_STRICT */
850# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
851 }
852 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
853# endif /* PGM_OUT_OF_SYNC_IN_GC */
854 }
855 else /* GCPhys == NIL_RTGCPHYS */
856 {
857 /*
858 * Page not present in Guest OS or invalid page table address.
859 * This is potential virtual page access handler food.
860 *
861 * For the present we'll say that our access handlers don't
862 * work for this case - we've already discarded the page table
863 * not present case which is identical to this.
864 *
865 * When we perchance find we need this, we will probably have AVL
866 * trees (offset based) to operate on and we can measure their speed
867 * agains mapping a page table and probably rearrange this handling
868 * a bit. (Like, searching virtual ranges before checking the
869 * physical address.)
870 */
871 }
872 }
873 /* else: !present (guest) */
874
875
876# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
877 /*
878 * Conclusion, this is a guest trap.
879 */
880 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
881 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eGuestPFUnh);
882 return VINF_EM_RAW_GUEST_TRAP;
883# else
884 /* present, but not a monitored page; perhaps the guest is probing physical memory */
885 return VINF_EM_RAW_EMULATE_INSTR;
886# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
887
888
889# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
890
891 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
892 return VERR_INTERNAL_ERROR;
893# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
894}
895#endif /* !IN_RING3 */
896
897
898/**
899 * Emulation of the invlpg instruction.
900 *
901 *
902 * @returns VBox status code.
903 *
904 * @param pVM VM handle.
905 * @param GCPtrPage Page to invalidate.
906 *
907 * @remark ASSUMES that the guest is updating before invalidating. This order
908 * isn't required by the CPU, so this is speculative and could cause
909 * trouble.
910 *
911 * @todo Flush page or page directory only if necessary!
912 * @todo Add a #define for simply invalidating the page.
913 */
914PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCPTR GCPtrPage)
915{
916#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
917 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
918 && PGM_SHW_TYPE != PGM_TYPE_EPT
919 int rc;
920
921 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
922 /*
923 * Get the shadow PD entry and skip out if this PD isn't present.
924 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
925 */
926# if PGM_SHW_TYPE == PGM_TYPE_32BIT
927 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
928 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
929
930 /* Fetch the pgm pool shadow descriptor. */
931 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
932 Assert(pShwPde);
933
934# elif PGM_SHW_TYPE == PGM_TYPE_PAE
935 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
936 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
937
938 /* If the shadow PDPE isn't present, then skip the invalidate. */
939 if (!pPdptDst->a[iPdpt].n.u1Present)
940 {
941 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
942 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
943 return VINF_SUCCESS;
944 }
945
946 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
947 PPGMPOOLPAGE pShwPde;
948 PX86PDPAE pPDDst;
949
950 /* Fetch the pgm pool shadow descriptor. */
951 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
952 AssertRCSuccessReturn(rc, rc);
953 Assert(pShwPde);
954
955 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
956 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
957
958# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
959 /* PML4 */
960 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
961 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
962 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
963 PX86PDPAE pPDDst;
964 PX86PDPT pPdptDst;
965 PX86PML4E pPml4eDst;
966 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
967 if (rc != VINF_SUCCESS)
968 {
969 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
970 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
971 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
972 PGM_INVL_GUEST_TLBS();
973 return VINF_SUCCESS;
974 }
975 Assert(pPDDst);
976
977 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
978 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
979
980 if (!pPdpeDst->n.u1Present)
981 {
982 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
983 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
984 PGM_INVL_GUEST_TLBS();
985 return VINF_SUCCESS;
986 }
987
988# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
989
990 const SHWPDE PdeDst = *pPdeDst;
991 if (!PdeDst.n.u1Present)
992 {
993 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
994 return VINF_SUCCESS;
995 }
996
997# if defined(IN_RC)
998 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
999 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1000# endif
1001
1002 /*
1003 * Get the guest PD entry and calc big page.
1004 */
1005# if PGM_GST_TYPE == PGM_TYPE_32BIT
1006 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
1007 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1008 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1009# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1010 unsigned iPDSrc;
1011# if PGM_GST_TYPE == PGM_TYPE_PAE
1012 X86PDPE PdpeSrc;
1013 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1014# else /* AMD64 */
1015 PX86PML4E pPml4eSrc;
1016 X86PDPE PdpeSrc;
1017 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1018# endif
1019 GSTPDE PdeSrc;
1020
1021 if (pPDSrc)
1022 PdeSrc = pPDSrc->a[iPDSrc];
1023 else
1024 PdeSrc.u = 0;
1025# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1026
1027# if PGM_GST_TYPE == PGM_TYPE_AMD64
1028 const bool fIsBigPage = PdeSrc.b.u1Size;
1029# else
1030 const bool fIsBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1031# endif
1032
1033# ifdef IN_RING3
1034 /*
1035 * If a CR3 Sync is pending we may ignore the invalidate page operation
1036 * depending on the kind of sync and if it's a global page or not.
1037 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1038 */
1039# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1040 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
1041 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
1042 && fIsBigPage
1043 && PdeSrc.b.u1Global
1044 )
1045 )
1046# else
1047 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1048# endif
1049 {
1050 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1051 return VINF_SUCCESS;
1052 }
1053# endif /* IN_RING3 */
1054
1055# if PGM_GST_TYPE == PGM_TYPE_AMD64
1056 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1057
1058 /* Fetch the pgm pool shadow descriptor. */
1059 PPGMPOOLPAGE pShwPdpt = pgmPoolGetPageByHCPhys(pVM, pPml4eDst->u & X86_PML4E_PG_MASK);
1060 Assert(pShwPdpt);
1061
1062 /* Fetch the pgm pool shadow descriptor. */
1063 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1064 Assert(pShwPde);
1065
1066 Assert(pPml4eDst->n.u1Present && (pPml4eDst->u & SHW_PDPT_MASK));
1067 RTGCPHYS GCPhysPdpt = pPml4eSrc->u & X86_PML4E_PG_MASK;
1068
1069 if ( !pPml4eSrc->n.u1Present
1070 || pShwPdpt->GCPhys != GCPhysPdpt)
1071 {
1072 LogFlow(("InvalidatePage: Out-of-sync PML4E (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1073 GCPtrPage, pShwPdpt->GCPhys, GCPhysPdpt, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1074 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1075 pPml4eDst->u = 0;
1076 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1077 PGM_INVL_GUEST_TLBS();
1078 return VINF_SUCCESS;
1079 }
1080 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
1081 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
1082 {
1083 /*
1084 * Mark not present so we can resync the PML4E when it's used.
1085 */
1086 LogFlow(("InvalidatePage: Out-of-sync PML4E at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1087 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1088 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1089 pPml4eDst->u = 0;
1090 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1091 PGM_INVL_GUEST_TLBS();
1092 }
1093 else if (!pPml4eSrc->n.u1Accessed)
1094 {
1095 /*
1096 * Mark not present so we can set the accessed bit.
1097 */
1098 LogFlow(("InvalidatePage: Out-of-sync PML4E (A) at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1099 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1100 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1101 pPml4eDst->u = 0;
1102 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1103 PGM_INVL_GUEST_TLBS();
1104 }
1105
1106 /* Check if the PDPT entry has changed. */
1107 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
1108 RTGCPHYS GCPhysPd = PdpeSrc.u & GST_PDPE_PG_MASK;
1109 if ( !PdpeSrc.n.u1Present
1110 || pShwPde->GCPhys != GCPhysPd)
1111 {
1112 LogFlow(("InvalidatePage: Out-of-sync PDPE (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1113 GCPtrPage, pShwPde->GCPhys, GCPhysPd, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1114 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1115 pPdpeDst->u = 0;
1116 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1117 PGM_INVL_GUEST_TLBS();
1118 return VINF_SUCCESS;
1119 }
1120 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
1121 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
1122 {
1123 /*
1124 * Mark not present so we can resync the PDPTE when it's used.
1125 */
1126 LogFlow(("InvalidatePage: Out-of-sync PDPE at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1127 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1128 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1129 pPdpeDst->u = 0;
1130 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1131 PGM_INVL_GUEST_TLBS();
1132 }
1133 else if (!PdpeSrc.lm.u1Accessed)
1134 {
1135 /*
1136 * Mark not present so we can set the accessed bit.
1137 */
1138 LogFlow(("InvalidatePage: Out-of-sync PDPE (A) at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1139 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1140 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1141 pPdpeDst->u = 0;
1142 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1143 PGM_INVL_GUEST_TLBS();
1144 }
1145# endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1146
1147
1148 /*
1149 * Deal with the Guest PDE.
1150 */
1151 rc = VINF_SUCCESS;
1152 if (PdeSrc.n.u1Present)
1153 {
1154 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1155 {
1156 /*
1157 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1158 */
1159 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1160 Assert(PGMGetGuestMode(pVM) <= PGMMODE_PAE);
1161 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
1162 }
1163 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
1164 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1165 {
1166 /*
1167 * Mark not present so we can resync the PDE when it's used.
1168 */
1169 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1170 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1171 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1172 pPdeDst->u = 0;
1173 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1174 PGM_INVL_GUEST_TLBS();
1175 }
1176 else if (!PdeSrc.n.u1Accessed)
1177 {
1178 /*
1179 * Mark not present so we can set the accessed bit.
1180 */
1181 LogFlow(("InvalidatePage: Out-of-sync (A) at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1182 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1183 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1184 pPdeDst->u = 0;
1185 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1186 PGM_INVL_GUEST_TLBS();
1187 }
1188 else if (!fIsBigPage)
1189 {
1190 /*
1191 * 4KB - page.
1192 */
1193 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1194 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1195# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1196 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1197 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1198# endif
1199 if (pShwPage->GCPhys == GCPhys)
1200 {
1201# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1202 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1203 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1204 if (pPT->a[iPTEDst].n.u1Present)
1205 {
1206# ifdef PGMPOOL_WITH_USER_TRACKING
1207 /* This is very unlikely with caching/monitoring enabled. */
1208 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1209# endif
1210 pPT->a[iPTEDst].u = 0;
1211 }
1212# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1213 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
1214 if (RT_SUCCESS(rc))
1215 rc = VINF_SUCCESS;
1216# endif
1217 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1218 PGM_INVL_PG(GCPtrPage);
1219 }
1220 else
1221 {
1222 /*
1223 * The page table address changed.
1224 */
1225 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1226 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1227 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1228 pPdeDst->u = 0;
1229 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1230 PGM_INVL_GUEST_TLBS();
1231 }
1232 }
1233 else
1234 {
1235 /*
1236 * 2/4MB - page.
1237 */
1238 /* Before freeing the page, check if anything really changed. */
1239 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1240 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1241# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1242 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1243 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1244# endif
1245 if ( pShwPage->GCPhys == GCPhys
1246 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1247 {
1248 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1249 /** @todo PAT */
1250 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1251 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1252 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1253 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1254 {
1255 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1256 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1257# if defined(IN_RC)
1258 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1259 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1260# endif
1261 return VINF_SUCCESS;
1262 }
1263 }
1264
1265 /*
1266 * Ok, the page table is present and it's been changed in the guest.
1267 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1268 * We could do this for some flushes in GC too, but we need an algorithm for
1269 * deciding which 4MB pages containing code likely to be executed very soon.
1270 */
1271 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1272 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1273 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1274 pPdeDst->u = 0;
1275 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1276 PGM_INVL_BIG_PG(GCPtrPage);
1277 }
1278 }
1279 else
1280 {
1281 /*
1282 * Page directory is not present, mark shadow PDE not present.
1283 */
1284 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1285 {
1286 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1287 pPdeDst->u = 0;
1288 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1289 PGM_INVL_PG(GCPtrPage);
1290 }
1291 else
1292 {
1293 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1294 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1295 }
1296 }
1297# if defined(IN_RC)
1298 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1299 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1300# endif
1301 return rc;
1302
1303#else /* guest real and protected mode */
1304 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1305 return VINF_SUCCESS;
1306#endif
1307}
1308
1309
1310#ifdef PGMPOOL_WITH_USER_TRACKING
1311/**
1312 * Update the tracking of shadowed pages.
1313 *
1314 * @param pVM The VM handle.
1315 * @param pShwPage The shadow page.
1316 * @param HCPhys The physical page we is being dereferenced.
1317 */
1318DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1319{
1320# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1321 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1322 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1323
1324 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1325 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1326 * 2. write protect all shadowed pages. I.e. implement caching.
1327 */
1328 /*
1329 * Find the guest address.
1330 */
1331 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1332 pRam;
1333 pRam = pRam->CTX_SUFF(pNext))
1334 {
1335 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1336 while (iPage-- > 0)
1337 {
1338 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1339 {
1340 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1341 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1342 pShwPage->cPresent--;
1343 pPool->cPresent--;
1344 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1345 return;
1346 }
1347 }
1348 }
1349
1350 for (;;)
1351 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1352# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1353 pShwPage->cPresent--;
1354 pVM->pgm.s.CTX_SUFF(pPool)->cPresent--;
1355# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1356}
1357
1358
1359/**
1360 * Update the tracking of shadowed pages.
1361 *
1362 * @param pVM The VM handle.
1363 * @param pShwPage The shadow page.
1364 * @param u16 The top 16-bit of the pPage->HCPhys.
1365 * @param pPage Pointer to the guest page. this will be modified.
1366 * @param iPTDst The index into the shadow table.
1367 */
1368DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1369{
1370# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1371 /*
1372 * Just deal with the simple first time here.
1373 */
1374 if (!u16)
1375 {
1376 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1377 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1378 }
1379 else
1380 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1381
1382 /* write back */
1383 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1384 PGM_PAGE_SET_TRACKING(pPage, u16);
1385
1386# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1387
1388 /* update statistics. */
1389 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1390 pShwPage->cPresent++;
1391 if (pShwPage->iFirstPresent > iPTDst)
1392 pShwPage->iFirstPresent = iPTDst;
1393}
1394#endif /* PGMPOOL_WITH_USER_TRACKING */
1395
1396
1397/**
1398 * Creates a 4K shadow page for a guest page.
1399 *
1400 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1401 * physical address. The PdeSrc argument only the flags are used. No page structured
1402 * will be mapped in this function.
1403 *
1404 * @param pVM VM handle.
1405 * @param pPteDst Destination page table entry.
1406 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1407 * Can safely assume that only the flags are being used.
1408 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1409 * @param pShwPage Pointer to the shadow page.
1410 * @param iPTDst The index into the shadow table.
1411 *
1412 * @remark Not used for 2/4MB pages!
1413 */
1414DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1415{
1416 if (PteSrc.n.u1Present)
1417 {
1418 /*
1419 * Find the ram range.
1420 */
1421 PPGMPAGE pPage;
1422 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1423 if (RT_SUCCESS(rc))
1424 {
1425#ifdef VBOX_WITH_NEW_PHYS_CODE
1426# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1427 /* Try make the page writable if necessary. */
1428 if ( PteSrc.n.u1Write
1429 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1430 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1431 {
1432 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1433 AssertRC(rc);
1434 }
1435# endif
1436#endif
1437
1438 /** @todo investiage PWT, PCD and PAT. */
1439 /*
1440 * Make page table entry.
1441 */
1442 SHWPTE PteDst;
1443 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1444 {
1445 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1446 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1447 {
1448#if PGM_SHW_TYPE == PGM_TYPE_EPT
1449 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1450 PteDst.n.u1Present = 1;
1451 PteDst.n.u1Execute = 1;
1452 PteDst.n.u1IgnorePAT = 1;
1453 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1454 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1455#else
1456 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1457 | PGM_PAGE_GET_HCPHYS(pPage);
1458#endif
1459 }
1460 else
1461 {
1462 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1463 PteDst.u = 0;
1464 }
1465 /** @todo count these two kinds. */
1466 }
1467 else
1468 {
1469#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1470 /*
1471 * If the page or page directory entry is not marked accessed,
1472 * we mark the page not present.
1473 */
1474 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1475 {
1476 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1477 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1478 PteDst.u = 0;
1479 }
1480 else
1481 /*
1482 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1483 * when the page is modified.
1484 */
1485 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1486 {
1487 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1488 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1489 | PGM_PAGE_GET_HCPHYS(pPage)
1490 | PGM_PTFLAGS_TRACK_DIRTY;
1491 }
1492 else
1493#endif
1494 {
1495 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1496#if PGM_SHW_TYPE == PGM_TYPE_EPT
1497 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1498 PteDst.n.u1Present = 1;
1499 PteDst.n.u1Write = 1;
1500 PteDst.n.u1Execute = 1;
1501 PteDst.n.u1IgnorePAT = 1;
1502 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1503 /* PteDst.n.u1Size = 0 */
1504#else
1505 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1506 | PGM_PAGE_GET_HCPHYS(pPage);
1507#endif
1508 }
1509 }
1510
1511#ifdef VBOX_WITH_NEW_PHYS_CODE
1512 /*
1513 * Make sure only allocated pages are mapped writable.
1514 */
1515 if ( PteDst.n.u1Write
1516 && PteDst.n.u1Present
1517 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1518 {
1519 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1520 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1521 }
1522#endif
1523
1524#ifdef PGMPOOL_WITH_USER_TRACKING
1525 /*
1526 * Keep user track up to date.
1527 */
1528 if (PteDst.n.u1Present)
1529 {
1530 if (!pPteDst->n.u1Present)
1531 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1532 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1533 {
1534 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1535 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1536 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1537 }
1538 }
1539 else if (pPteDst->n.u1Present)
1540 {
1541 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1542 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1543 }
1544#endif /* PGMPOOL_WITH_USER_TRACKING */
1545
1546 /*
1547 * Update statistics and commit the entry.
1548 */
1549#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1550 if (!PteSrc.n.u1Global)
1551 pShwPage->fSeenNonGlobal = true;
1552#endif
1553 *pPteDst = PteDst;
1554 }
1555 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1556 /** @todo count these. */
1557 }
1558 else
1559 {
1560 /*
1561 * Page not-present.
1562 */
1563 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1564#ifdef PGMPOOL_WITH_USER_TRACKING
1565 /* Keep user track up to date. */
1566 if (pPteDst->n.u1Present)
1567 {
1568 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1569 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1570 }
1571#endif /* PGMPOOL_WITH_USER_TRACKING */
1572 pPteDst->u = 0;
1573 /** @todo count these. */
1574 }
1575}
1576
1577
1578/**
1579 * Syncs a guest OS page.
1580 *
1581 * There are no conflicts at this point, neither is there any need for
1582 * page table allocations.
1583 *
1584 * @returns VBox status code.
1585 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1586 * @param pVM VM handle.
1587 * @param PdeSrc Page directory entry of the guest.
1588 * @param GCPtrPage Guest context page address.
1589 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1590 * @param uErr Fault error (X86_TRAP_PF_*).
1591 */
1592PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1593{
1594 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1595
1596#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1597 || PGM_GST_TYPE == PGM_TYPE_PAE \
1598 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1599 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1600 && PGM_SHW_TYPE != PGM_TYPE_EPT
1601
1602# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1603 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1604# endif
1605
1606 /*
1607 * Assert preconditions.
1608 */
1609 Assert(PdeSrc.n.u1Present);
1610 Assert(cPages);
1611 STAM_COUNTER_INC(&pVM->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1612
1613 /*
1614 * Get the shadow PDE, find the shadow page table in the pool.
1615 */
1616# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1617 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1618 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
1619
1620 /* Fetch the pgm pool shadow descriptor. */
1621 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
1622 Assert(pShwPde);
1623
1624# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1625 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1626 PPGMPOOLPAGE pShwPde;
1627 PX86PDPAE pPDDst;
1628
1629 /* Fetch the pgm pool shadow descriptor. */
1630 int rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
1631 AssertRCSuccessReturn(rc, rc);
1632 Assert(pShwPde);
1633
1634 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1635 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1636
1637# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1638 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1639 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1640 PX86PDPAE pPDDst;
1641 PX86PDPT pPdptDst;
1642
1643 int rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1644 AssertRCSuccessReturn(rc, rc);
1645 Assert(pPDDst && pPdptDst);
1646 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1647# endif
1648
1649 SHWPDE PdeDst = *pPdeDst;
1650 AssertMsg(PdeDst.n.u1Present, ("%p=%llx\n", pPdeDst, (uint64_t)PdeDst.u));
1651 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1652
1653# if PGM_GST_TYPE == PGM_TYPE_AMD64
1654 /* Fetch the pgm pool shadow descriptor. */
1655 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1656 Assert(pShwPde);
1657# endif
1658
1659# if defined(IN_RC)
1660 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1661 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1662# endif
1663
1664 /*
1665 * Check that the page is present and that the shadow PDE isn't out of sync.
1666 */
1667# if PGM_GST_TYPE == PGM_TYPE_AMD64
1668 const bool fBigPage = PdeSrc.b.u1Size;
1669# else
1670 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1671# endif
1672 RTGCPHYS GCPhys;
1673 if (!fBigPage)
1674 {
1675 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1676# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1677 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1678 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1679# endif
1680 }
1681 else
1682 {
1683 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1684# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1685 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1686 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1687# endif
1688 }
1689 if ( pShwPage->GCPhys == GCPhys
1690 && PdeSrc.n.u1Present
1691 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1692 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1693# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1694 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1695# endif
1696 )
1697 {
1698 /*
1699 * Check that the PDE is marked accessed already.
1700 * Since we set the accessed bit *before* getting here on a #PF, this
1701 * check is only meant for dealing with non-#PF'ing paths.
1702 */
1703 if (PdeSrc.n.u1Accessed)
1704 {
1705 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1706 if (!fBigPage)
1707 {
1708 /*
1709 * 4KB Page - Map the guest page table.
1710 */
1711 PGSTPT pPTSrc;
1712 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1713 if (RT_SUCCESS(rc))
1714 {
1715# ifdef PGM_SYNC_N_PAGES
1716 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1717 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1718 {
1719 /*
1720 * This code path is currently only taken when the caller is PGMTrap0eHandler
1721 * for non-present pages!
1722 *
1723 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1724 * deal with locality.
1725 */
1726 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1727# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1728 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1729 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1730# else
1731 const unsigned offPTSrc = 0;
1732# endif
1733 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1734 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1735 iPTDst = 0;
1736 else
1737 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1738 for (; iPTDst < iPTDstEnd; iPTDst++)
1739 {
1740 if (!pPTDst->a[iPTDst].n.u1Present)
1741 {
1742 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1743 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1744 NOREF(GCPtrCurPage);
1745#ifndef IN_RING0
1746 /*
1747 * Assuming kernel code will be marked as supervisor - and not as user level
1748 * and executed using a conforming code selector - And marked as readonly.
1749 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1750 */
1751 PPGMPAGE pPage;
1752 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1753 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1754 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)GCPtrCurPage)
1755 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1756 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1757 )
1758#endif /* else: CSAM not active */
1759 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1760 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1761 GCPtrCurPage, PteSrc.n.u1Present,
1762 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1763 PteSrc.n.u1User & PdeSrc.n.u1User,
1764 (uint64_t)PteSrc.u,
1765 (uint64_t)pPTDst->a[iPTDst].u,
1766 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1767 }
1768 }
1769 }
1770 else
1771# endif /* PGM_SYNC_N_PAGES */
1772 {
1773 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1774 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1775 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1776 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1777 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1778 GCPtrPage, PteSrc.n.u1Present,
1779 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1780 PteSrc.n.u1User & PdeSrc.n.u1User,
1781 (uint64_t)PteSrc.u,
1782 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1783 }
1784 }
1785 else /* MMIO or invalid page: emulated in #PF handler. */
1786 {
1787 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1788 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1789 }
1790 }
1791 else
1792 {
1793 /*
1794 * 4/2MB page - lazy syncing shadow 4K pages.
1795 * (There are many causes of getting here, it's no longer only CSAM.)
1796 */
1797 /* Calculate the GC physical address of this 4KB shadow page. */
1798 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1799 /* Find ram range. */
1800 PPGMPAGE pPage;
1801 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1802 if (RT_SUCCESS(rc))
1803 {
1804# ifdef VBOX_WITH_NEW_PHYS_CODE
1805# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1806 /* Try make the page writable if necessary. */
1807 if ( PdeSrc.n.u1Write
1808 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1809 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1810 {
1811 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
1812 AssertRC(rc);
1813 }
1814# endif
1815# endif
1816
1817 /*
1818 * Make shadow PTE entry.
1819 */
1820 SHWPTE PteDst;
1821 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1822 | PGM_PAGE_GET_HCPHYS(pPage);
1823 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1824 {
1825 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1826 PteDst.n.u1Write = 0;
1827 else
1828 PteDst.u = 0;
1829 }
1830 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1831# ifdef PGMPOOL_WITH_USER_TRACKING
1832 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1833 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1834# endif
1835# ifdef VBOX_WITH_NEW_PHYS_CODE
1836 /* Make sure only allocated pages are mapped writable. */
1837 if ( PteDst.n.u1Write
1838 && PteDst.n.u1Present
1839 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1840 {
1841 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1842 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1843 }
1844# endif
1845
1846 pPTDst->a[iPTDst] = PteDst;
1847
1848
1849 /*
1850 * If the page is not flagged as dirty and is writable, then make it read-only
1851 * at PD level, so we can set the dirty bit when the page is modified.
1852 *
1853 * ASSUMES that page access handlers are implemented on page table entry level.
1854 * Thus we will first catch the dirty access and set PDE.D and restart. If
1855 * there is an access handler, we'll trap again and let it work on the problem.
1856 */
1857 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1858 * As for invlpg, it simply frees the whole shadow PT.
1859 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1860 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1861 {
1862 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1863 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1864 PdeDst.n.u1Write = 0;
1865 }
1866 else
1867 {
1868 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1869 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1870 }
1871 *pPdeDst = PdeDst;
1872 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1873 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1874 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1875 }
1876 else
1877 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1878 }
1879# if defined(IN_RC)
1880 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1881 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1882# endif
1883 return VINF_SUCCESS;
1884 }
1885 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1886 }
1887 else
1888 {
1889 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1890 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1891 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1892 }
1893
1894 /*
1895 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1896 * Yea, I'm lazy.
1897 */
1898 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1899 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1900
1901 pPdeDst->u = 0;
1902
1903# if defined(IN_RC)
1904 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1905 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1906# endif
1907 PGM_INVL_GUEST_TLBS();
1908 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1909
1910#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1911 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1912 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1913 && !defined(IN_RC)
1914
1915# ifdef PGM_SYNC_N_PAGES
1916 /*
1917 * Get the shadow PDE, find the shadow page table in the pool.
1918 */
1919# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1920 X86PDE PdeDst = pgmShwGet32BitPDE(&pVM->pgm.s, GCPtrPage);
1921
1922# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1923 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVM->pgm.s, GCPtrPage);
1924
1925# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1926 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1927 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1928 PX86PDPAE pPDDst;
1929 X86PDEPAE PdeDst;
1930 PX86PDPT pPdptDst;
1931
1932 int rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1933 AssertRCSuccessReturn(rc, rc);
1934 Assert(pPDDst && pPdptDst);
1935 PdeDst = pPDDst->a[iPDDst];
1936# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1937 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1938 PEPTPD pPDDst;
1939 EPTPDE PdeDst;
1940
1941 int rc = pgmShwGetEPTPDPtr(pVM, GCPtrPage, NULL, &pPDDst);
1942 if (rc != VINF_SUCCESS)
1943 {
1944 AssertRC(rc);
1945 return rc;
1946 }
1947 Assert(pPDDst);
1948 PdeDst = pPDDst->a[iPDDst];
1949# endif
1950 AssertMsg(PdeDst.n.u1Present, ("%#llx\n", (uint64_t)PdeDst.u));
1951 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1952 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1953
1954 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1955 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1956 {
1957 /*
1958 * This code path is currently only taken when the caller is PGMTrap0eHandler
1959 * for non-present pages!
1960 *
1961 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1962 * deal with locality.
1963 */
1964 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1965 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1966 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1967 iPTDst = 0;
1968 else
1969 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1970 for (; iPTDst < iPTDstEnd; iPTDst++)
1971 {
1972 if (!pPTDst->a[iPTDst].n.u1Present)
1973 {
1974 GSTPTE PteSrc;
1975
1976 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
1977
1978 /* Fake the page table entry */
1979 PteSrc.u = GCPtrCurPage;
1980 PteSrc.n.u1Present = 1;
1981 PteSrc.n.u1Dirty = 1;
1982 PteSrc.n.u1Accessed = 1;
1983 PteSrc.n.u1Write = 1;
1984 PteSrc.n.u1User = 1;
1985
1986 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1987
1988 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1989 GCPtrCurPage, PteSrc.n.u1Present,
1990 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1991 PteSrc.n.u1User & PdeSrc.n.u1User,
1992 (uint64_t)PteSrc.u,
1993 (uint64_t)pPTDst->a[iPTDst].u,
1994 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1995 }
1996 else
1997 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
1998 }
1999 }
2000 else
2001# endif /* PGM_SYNC_N_PAGES */
2002 {
2003 GSTPTE PteSrc;
2004 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2005 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2006
2007 /* Fake the page table entry */
2008 PteSrc.u = GCPtrCurPage;
2009 PteSrc.n.u1Present = 1;
2010 PteSrc.n.u1Dirty = 1;
2011 PteSrc.n.u1Accessed = 1;
2012 PteSrc.n.u1Write = 1;
2013 PteSrc.n.u1User = 1;
2014 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2015
2016 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2017 GCPtrPage, PteSrc.n.u1Present,
2018 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2019 PteSrc.n.u1User & PdeSrc.n.u1User,
2020 (uint64_t)PteSrc.u,
2021 (uint64_t)pPTDst->a[iPTDst].u,
2022 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2023 }
2024 return VINF_SUCCESS;
2025
2026#else
2027 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2028 return VERR_INTERNAL_ERROR;
2029#endif
2030}
2031
2032
2033#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2034/**
2035 * Investigate page fault and handle write protection page faults caused by
2036 * dirty bit tracking.
2037 *
2038 * @returns VBox status code.
2039 * @param pVM VM handle.
2040 * @param uErr Page fault error code.
2041 * @param pPdeDst Shadow page directory entry.
2042 * @param pPdeSrc Guest page directory entry.
2043 * @param GCPtrPage Guest context page address.
2044 */
2045PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2046{
2047 bool fWriteProtect = !!(CPUMGetGuestCR0(pVM) & X86_CR0_WP);
2048 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2049 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2050# if PGM_GST_TYPE == PGM_TYPE_AMD64
2051 bool fBigPagesSupported = true;
2052# else
2053 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2054# endif
2055# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2056 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
2057# endif
2058 unsigned uPageFaultLevel;
2059 int rc;
2060
2061 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2062 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2063
2064# if PGM_GST_TYPE == PGM_TYPE_PAE \
2065 || PGM_GST_TYPE == PGM_TYPE_AMD64
2066
2067# if PGM_GST_TYPE == PGM_TYPE_AMD64
2068 PX86PML4E pPml4eSrc;
2069 PX86PDPE pPdpeSrc;
2070
2071 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc);
2072 Assert(pPml4eSrc);
2073
2074 /*
2075 * Real page fault? (PML4E level)
2076 */
2077 if ( (uErr & X86_TRAP_PF_RSVD)
2078 || !pPml4eSrc->n.u1Present
2079 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
2080 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2081 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2082 )
2083 {
2084 uPageFaultLevel = 0;
2085 goto l_UpperLevelPageFault;
2086 }
2087 Assert(pPdpeSrc);
2088
2089# else /* PAE */
2090 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVM->pgm.s, GCPtrPage);
2091# endif /* PAE */
2092
2093 /*
2094 * Real page fault? (PDPE level)
2095 */
2096 if ( (uErr & X86_TRAP_PF_RSVD)
2097 || !pPdpeSrc->n.u1Present
2098# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2099 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
2100 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
2101 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2102# endif
2103 )
2104 {
2105 uPageFaultLevel = 1;
2106 goto l_UpperLevelPageFault;
2107 }
2108# endif
2109
2110 /*
2111 * Real page fault? (PDE level)
2112 */
2113 if ( (uErr & X86_TRAP_PF_RSVD)
2114 || !pPdeSrc->n.u1Present
2115# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2116 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
2117# endif
2118 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2119 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2120 {
2121 uPageFaultLevel = 2;
2122 goto l_UpperLevelPageFault;
2123 }
2124
2125 /*
2126 * First check the easy case where the page directory has been marked read-only to track
2127 * the dirty bit of an emulated BIG page
2128 */
2129 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2130 {
2131 /* Mark guest page directory as accessed */
2132# if PGM_GST_TYPE == PGM_TYPE_AMD64
2133 pPml4eSrc->n.u1Accessed = 1;
2134 pPdpeSrc->lm.u1Accessed = 1;
2135# endif
2136 pPdeSrc->b.u1Accessed = 1;
2137
2138 /*
2139 * Only write protection page faults are relevant here.
2140 */
2141 if (fWriteFault)
2142 {
2143 /* Mark guest page directory as dirty (BIG page only). */
2144 pPdeSrc->b.u1Dirty = 1;
2145
2146 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2147 {
2148 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2149
2150 Assert(pPdeSrc->b.u1Write);
2151
2152 pPdeDst->n.u1Write = 1;
2153 pPdeDst->n.u1Accessed = 1;
2154 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2155 PGM_INVL_BIG_PG(GCPtrPage);
2156 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2157 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2158 }
2159 }
2160 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2161 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2162 }
2163 /* else: 4KB page table */
2164
2165 /*
2166 * Map the guest page table.
2167 */
2168 PGSTPT pPTSrc;
2169 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2170 if (RT_SUCCESS(rc))
2171 {
2172 /*
2173 * Real page fault?
2174 */
2175 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2176 const GSTPTE PteSrc = *pPteSrc;
2177 if ( !PteSrc.n.u1Present
2178# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2179 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
2180# endif
2181 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
2182 || (fUserLevelFault && !PteSrc.n.u1User)
2183 )
2184 {
2185 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2186 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2187 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2188
2189 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2190 * See the 2nd case above as well.
2191 */
2192 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2193 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2194
2195 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2196 return VINF_EM_RAW_GUEST_TRAP;
2197 }
2198 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2199
2200 /*
2201 * Set the accessed bits in the page directory and the page table.
2202 */
2203# if PGM_GST_TYPE == PGM_TYPE_AMD64
2204 pPml4eSrc->n.u1Accessed = 1;
2205 pPdpeSrc->lm.u1Accessed = 1;
2206# endif
2207 pPdeSrc->n.u1Accessed = 1;
2208 pPteSrc->n.u1Accessed = 1;
2209
2210 /*
2211 * Only write protection page faults are relevant here.
2212 */
2213 if (fWriteFault)
2214 {
2215 /* Write access, so mark guest entry as dirty. */
2216# ifdef VBOX_WITH_STATISTICS
2217 if (!pPteSrc->n.u1Dirty)
2218 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2219 else
2220 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2221# endif
2222
2223 pPteSrc->n.u1Dirty = 1;
2224
2225 if (pPdeDst->n.u1Present)
2226 {
2227#ifndef IN_RING0
2228 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2229 * Our individual shadow handlers will provide more information and force a fatal exit.
2230 */
2231 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2232 {
2233 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2234 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2235 return VINF_SUCCESS;
2236 }
2237#endif
2238 /*
2239 * Map shadow page table.
2240 */
2241 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2242 if (pShwPage)
2243 {
2244 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2245 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2246 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
2247 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
2248 {
2249 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2250# ifdef VBOX_STRICT
2251 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2252 if (pPage)
2253 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
2254 ("Unexpected dirty bit tracking on monitored page %RGv (phys %RGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
2255# endif
2256 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2257
2258 Assert(pPteSrc->n.u1Write);
2259
2260 pPteDst->n.u1Write = 1;
2261 pPteDst->n.u1Dirty = 1;
2262 pPteDst->n.u1Accessed = 1;
2263 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2264 PGM_INVL_PG(GCPtrPage);
2265
2266 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2267 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2268 }
2269 }
2270 else
2271 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2272 }
2273 }
2274/** @todo Optimize accessed bit emulation? */
2275# ifdef VBOX_STRICT
2276 /*
2277 * Sanity check.
2278 */
2279 else if ( !pPteSrc->n.u1Dirty
2280 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2281 && pPdeDst->n.u1Present)
2282 {
2283 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2284 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2285 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2286 if ( pPteDst->n.u1Present
2287 && pPteDst->n.u1Write)
2288 LogFlow(("Writable present page %RGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2289 }
2290# endif /* VBOX_STRICT */
2291 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2292 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2293 }
2294 AssertRC(rc);
2295 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2296 return rc;
2297
2298
2299l_UpperLevelPageFault:
2300 /*
2301 * Pagefault detected while checking the PML4E, PDPE or PDE.
2302 * Single exit handler to get rid of duplicate code paths.
2303 */
2304 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2305 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2306 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2307
2308 if (
2309# if PGM_GST_TYPE == PGM_TYPE_AMD64
2310 pPml4eSrc->n.u1Present &&
2311# endif
2312# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2313 pPdpeSrc->n.u1Present &&
2314# endif
2315 pPdeSrc->n.u1Present)
2316 {
2317 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2318 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2319 {
2320 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2321 }
2322 else
2323 {
2324 /*
2325 * Map the guest page table.
2326 */
2327 PGSTPT pPTSrc;
2328 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2329 if (RT_SUCCESS(rc))
2330 {
2331 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2332 const GSTPTE PteSrc = *pPteSrc;
2333 if (pPteSrc->n.u1Present)
2334 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2335 }
2336 AssertRC(rc);
2337 }
2338 }
2339 return VINF_EM_RAW_GUEST_TRAP;
2340}
2341#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2342
2343
2344/**
2345 * Sync a shadow page table.
2346 *
2347 * The shadow page table is not present. This includes the case where
2348 * there is a conflict with a mapping.
2349 *
2350 * @returns VBox status code.
2351 * @param pVM VM handle.
2352 * @param iPD Page directory index.
2353 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2354 * Assume this is a temporary mapping.
2355 * @param GCPtrPage GC Pointer of the page that caused the fault
2356 */
2357PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2358{
2359 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2360 STAM_COUNTER_INC(&pVM->pgm.s.StatSyncPtPD[iPDSrc]);
2361 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2362
2363#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2364 || PGM_GST_TYPE == PGM_TYPE_PAE \
2365 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2366 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2367 && PGM_SHW_TYPE != PGM_TYPE_EPT
2368
2369 int rc = VINF_SUCCESS;
2370
2371 /*
2372 * Validate input a little bit.
2373 */
2374 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2375# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2376 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2377 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
2378
2379 /* Fetch the pgm pool shadow descriptor. */
2380 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
2381 Assert(pShwPde);
2382
2383# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2384 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2385 PPGMPOOLPAGE pShwPde;
2386 PX86PDPAE pPDDst;
2387 PSHWPDE pPdeDst;
2388
2389 /* Fetch the pgm pool shadow descriptor. */
2390 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
2391 AssertRCSuccessReturn(rc, rc);
2392 Assert(pShwPde);
2393
2394 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2395 pPdeDst = &pPDDst->a[iPDDst];
2396
2397# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2398 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2399 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2400 PX86PDPAE pPDDst;
2401 PX86PDPT pPdptDst;
2402 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2403 AssertRCSuccessReturn(rc, rc);
2404 Assert(pPDDst);
2405 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2406# endif
2407 SHWPDE PdeDst = *pPdeDst;
2408
2409# if PGM_GST_TYPE == PGM_TYPE_AMD64
2410 /* Fetch the pgm pool shadow descriptor. */
2411 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2412 Assert(pShwPde);
2413# endif
2414
2415# ifndef PGM_WITHOUT_MAPPINGS
2416 /*
2417 * Check for conflicts.
2418 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2419 * HC: Simply resolve the conflict.
2420 */
2421 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2422 {
2423 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2424# ifndef IN_RING3
2425 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2426 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2427 return VERR_ADDRESS_CONFLICT;
2428# else
2429 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2430 Assert(pMapping);
2431# if PGM_GST_TYPE == PGM_TYPE_32BIT
2432 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2433# elif PGM_GST_TYPE == PGM_TYPE_PAE
2434 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2435# else
2436 AssertFailed(); /* can't happen for amd64 */
2437# endif
2438 if (RT_FAILURE(rc))
2439 {
2440 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2441 return rc;
2442 }
2443 PdeDst = *pPdeDst;
2444# endif
2445 }
2446# else /* PGM_WITHOUT_MAPPINGS */
2447 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2448# endif /* PGM_WITHOUT_MAPPINGS */
2449 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2450
2451# if defined(IN_RC)
2452 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2453 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2454# endif
2455
2456 /*
2457 * Sync page directory entry.
2458 */
2459 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2460 if (PdeSrc.n.u1Present)
2461 {
2462 /*
2463 * Allocate & map the page table.
2464 */
2465 PSHWPT pPTDst;
2466# if PGM_GST_TYPE == PGM_TYPE_AMD64
2467 const bool fPageTable = !PdeSrc.b.u1Size;
2468# else
2469 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2470# endif
2471 PPGMPOOLPAGE pShwPage;
2472 RTGCPHYS GCPhys;
2473 if (fPageTable)
2474 {
2475 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2476# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2477 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2478 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2479# endif
2480 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2481 }
2482 else
2483 {
2484 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2485# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2486 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2487 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2488# endif
2489 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, pShwPde->idx, iPDDst, &pShwPage);
2490 }
2491 if (rc == VINF_SUCCESS)
2492 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2493 else if (rc == VINF_PGM_CACHED_PAGE)
2494 {
2495 /*
2496 * The PT was cached, just hook it up.
2497 */
2498 if (fPageTable)
2499 PdeDst.u = pShwPage->Core.Key
2500 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2501 else
2502 {
2503 PdeDst.u = pShwPage->Core.Key
2504 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2505 /* (see explanation and assumptions further down.) */
2506 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2507 {
2508 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2509 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2510 PdeDst.b.u1Write = 0;
2511 }
2512 }
2513 *pPdeDst = PdeDst;
2514# if defined(IN_RC)
2515 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2516# endif
2517 return VINF_SUCCESS;
2518 }
2519 else if (rc == VERR_PGM_POOL_FLUSHED)
2520 {
2521 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
2522# if defined(IN_RC)
2523 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2524# endif
2525 return VINF_PGM_SYNC_CR3;
2526 }
2527 else
2528 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2529 PdeDst.u &= X86_PDE_AVL_MASK;
2530 PdeDst.u |= pShwPage->Core.Key;
2531
2532 /*
2533 * Page directory has been accessed (this is a fault situation, remember).
2534 */
2535 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2536 if (fPageTable)
2537 {
2538 /*
2539 * Page table - 4KB.
2540 *
2541 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2542 */
2543 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2544 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2545 PGSTPT pPTSrc;
2546 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2547 if (RT_SUCCESS(rc))
2548 {
2549 /*
2550 * Start by syncing the page directory entry so CSAM's TLB trick works.
2551 */
2552 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2553 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2554 *pPdeDst = PdeDst;
2555# if defined(IN_RC)
2556 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2557# endif
2558
2559 /*
2560 * Directory/page user or supervisor privilege: (same goes for read/write)
2561 *
2562 * Directory Page Combined
2563 * U/S U/S U/S
2564 * 0 0 0
2565 * 0 1 0
2566 * 1 0 0
2567 * 1 1 1
2568 *
2569 * Simple AND operation. Table listed for completeness.
2570 *
2571 */
2572 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2573# ifdef PGM_SYNC_N_PAGES
2574 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2575 unsigned iPTDst = iPTBase;
2576 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2577 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2578 iPTDst = 0;
2579 else
2580 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2581# else /* !PGM_SYNC_N_PAGES */
2582 unsigned iPTDst = 0;
2583 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2584# endif /* !PGM_SYNC_N_PAGES */
2585# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2586 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2587 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2588# else
2589 const unsigned offPTSrc = 0;
2590# endif
2591 for (; iPTDst < iPTDstEnd; iPTDst++)
2592 {
2593 const unsigned iPTSrc = iPTDst + offPTSrc;
2594 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2595
2596 if (PteSrc.n.u1Present) /* we've already cleared it above */
2597 {
2598# ifndef IN_RING0
2599 /*
2600 * Assuming kernel code will be marked as supervisor - and not as user level
2601 * and executed using a conforming code selector - And marked as readonly.
2602 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2603 */
2604 PPGMPAGE pPage;
2605 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2606 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2607 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2608 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2609 )
2610# endif
2611 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2612 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2613 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2614 PteSrc.n.u1Present,
2615 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2616 PteSrc.n.u1User & PdeSrc.n.u1User,
2617 (uint64_t)PteSrc.u,
2618 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2619 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2620 }
2621 } /* for PTEs */
2622 }
2623 }
2624 else
2625 {
2626 /*
2627 * Big page - 2/4MB.
2628 *
2629 * We'll walk the ram range list in parallel and optimize lookups.
2630 * We will only sync on shadow page table at a time.
2631 */
2632 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2633
2634 /**
2635 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2636 */
2637
2638 /*
2639 * Start by syncing the page directory entry.
2640 */
2641 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2642 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2643
2644 /*
2645 * If the page is not flagged as dirty and is writable, then make it read-only
2646 * at PD level, so we can set the dirty bit when the page is modified.
2647 *
2648 * ASSUMES that page access handlers are implemented on page table entry level.
2649 * Thus we will first catch the dirty access and set PDE.D and restart. If
2650 * there is an access handler, we'll trap again and let it work on the problem.
2651 */
2652 /** @todo move the above stuff to a section in the PGM documentation. */
2653 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2654 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2655 {
2656 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2657 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2658 PdeDst.b.u1Write = 0;
2659 }
2660 *pPdeDst = PdeDst;
2661# if defined(IN_RC)
2662 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2663# endif
2664
2665 /*
2666 * Fill the shadow page table.
2667 */
2668 /* Get address and flags from the source PDE. */
2669 SHWPTE PteDstBase;
2670 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2671
2672 /* Loop thru the entries in the shadow PT. */
2673 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2674 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2675 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2676 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2677 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2678 unsigned iPTDst = 0;
2679 while (iPTDst < RT_ELEMENTS(pPTDst->a))
2680 {
2681 /* Advance ram range list. */
2682 while (pRam && GCPhys > pRam->GCPhysLast)
2683 pRam = pRam->CTX_SUFF(pNext);
2684 if (pRam && GCPhys >= pRam->GCPhys)
2685 {
2686 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2687 do
2688 {
2689 /* Make shadow PTE. */
2690 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2691 SHWPTE PteDst;
2692
2693# ifdef VBOX_WITH_NEW_PHYS_CODE
2694# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2695 /* Try make the page writable if necessary. */
2696 if ( PteDstBase.n.u1Write
2697 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2698 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2699 {
2700 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
2701 AssertRCReturn(rc, rc);
2702 }
2703# endif
2704# else /* !VBOX_WITH_NEW_PHYS_CODE */
2705 /* Make sure the RAM has already been allocated. */
2706 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC) /** @todo PAGE FLAGS */
2707 {
2708 if (RT_UNLIKELY(!PGM_PAGE_GET_HCPHYS(pPage)))
2709 {
2710# ifdef IN_RING3
2711 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2712# else
2713 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2714# endif
2715 if (rc != VINF_SUCCESS)
2716 return rc;
2717 }
2718 }
2719# endif /* !VBOX_WITH_NEW_PHYS_CODE */
2720
2721 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2722 {
2723 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2724 {
2725 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2726 PteDst.n.u1Write = 0;
2727 }
2728 else
2729 PteDst.u = 0;
2730 }
2731# ifndef IN_RING0
2732 /*
2733 * Assuming kernel code will be marked as supervisor and not as user level and executed
2734 * using a conforming code selector. Don't check for readonly, as that implies the whole
2735 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2736 */
2737 else if ( !PdeSrc.n.u1User
2738 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2739 PteDst.u = 0;
2740# endif
2741 else
2742 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2743
2744# ifdef VBOX_WITH_NEW_PHYS_CODE
2745 /* Only map writable pages writable. */
2746 if ( PteDst.n.u1Write
2747 && PteDst.n.u1Present
2748 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2749 {
2750 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2751 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2752 }
2753# endif
2754
2755# ifdef PGMPOOL_WITH_USER_TRACKING
2756 if (PteDst.n.u1Present)
2757 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2758# endif
2759 /* commit it */
2760 pPTDst->a[iPTDst] = PteDst;
2761 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2762 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2763 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2764
2765 /* advance */
2766 GCPhys += PAGE_SIZE;
2767 iHCPage++;
2768 iPTDst++;
2769 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2770 && GCPhys <= pRam->GCPhysLast);
2771 }
2772 else if (pRam)
2773 {
2774 Log(("Invalid pages at %RGp\n", GCPhys));
2775 do
2776 {
2777 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2778 GCPhys += PAGE_SIZE;
2779 iPTDst++;
2780 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2781 && GCPhys < pRam->GCPhys);
2782 }
2783 else
2784 {
2785 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2786 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2787 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2788 }
2789 } /* while more PTEs */
2790 } /* 4KB / 4MB */
2791 }
2792 else
2793 AssertRelease(!PdeDst.n.u1Present);
2794
2795 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2796 if (RT_FAILURE(rc))
2797 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2798 return rc;
2799
2800#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2801 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2802 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2803 && !defined(IN_RC)
2804
2805 /*
2806 * Validate input a little bit.
2807 */
2808 int rc = VINF_SUCCESS;
2809# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2810 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2811 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
2812
2813 /* Fetch the pgm pool shadow descriptor. */
2814 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
2815 Assert(pShwPde);
2816
2817# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2818 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2819 PPGMPOOLPAGE pShwPde;
2820 PX86PDPAE pPDDst;
2821 PSHWPDE pPdeDst;
2822
2823 /* Fetch the pgm pool shadow descriptor. */
2824 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
2825 AssertRCSuccessReturn(rc, rc);
2826 Assert(pShwPde);
2827
2828 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2829 pPdeDst = &pPDDst->a[iPDDst];
2830
2831# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2832 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2833 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2834 PX86PDPAE pPDDst;
2835 PX86PDPT pPdptDst;
2836 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2837 AssertRCSuccessReturn(rc, rc);
2838 Assert(pPDDst);
2839 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2840
2841 /* Fetch the pgm pool shadow descriptor. */
2842 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2843 Assert(pShwPde);
2844
2845# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2846 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2847 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2848 PEPTPD pPDDst;
2849 PEPTPDPT pPdptDst;
2850
2851 rc = pgmShwGetEPTPDPtr(pVM, GCPtrPage, &pPdptDst, &pPDDst);
2852 if (rc != VINF_SUCCESS)
2853 {
2854 AssertRC(rc);
2855 return rc;
2856 }
2857 Assert(pPDDst);
2858 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2859
2860 /* Fetch the pgm pool shadow descriptor. */
2861 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2862 Assert(pShwPde);
2863# endif
2864 SHWPDE PdeDst = *pPdeDst;
2865
2866 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2867 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2868
2869 GSTPDE PdeSrc;
2870 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2871 PdeSrc.n.u1Present = 1;
2872 PdeSrc.n.u1Write = 1;
2873 PdeSrc.n.u1Accessed = 1;
2874 PdeSrc.n.u1User = 1;
2875
2876 /*
2877 * Allocate & map the page table.
2878 */
2879 PSHWPT pPTDst;
2880 PPGMPOOLPAGE pShwPage;
2881 RTGCPHYS GCPhys;
2882
2883 /* Virtual address = physical address */
2884 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
2885 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2886
2887 if ( rc == VINF_SUCCESS
2888 || rc == VINF_PGM_CACHED_PAGE)
2889 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2890 else
2891 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2892
2893 PdeDst.u &= X86_PDE_AVL_MASK;
2894 PdeDst.u |= pShwPage->Core.Key;
2895 PdeDst.n.u1Present = 1;
2896 PdeDst.n.u1Write = 1;
2897# if PGM_SHW_TYPE == PGM_TYPE_EPT
2898 PdeDst.n.u1Execute = 1;
2899# else
2900 PdeDst.n.u1User = 1;
2901 PdeDst.n.u1Accessed = 1;
2902# endif
2903 *pPdeDst = PdeDst;
2904
2905 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2906 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2907 return rc;
2908
2909#else
2910 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
2911 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2912 return VERR_INTERNAL_ERROR;
2913#endif
2914}
2915
2916
2917
2918/**
2919 * Prefetch a page/set of pages.
2920 *
2921 * Typically used to sync commonly used pages before entering raw mode
2922 * after a CR3 reload.
2923 *
2924 * @returns VBox status code.
2925 * @param pVM VM handle.
2926 * @param GCPtrPage Page to invalidate.
2927 */
2928PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCPTR GCPtrPage)
2929{
2930#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2931 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
2932 /*
2933 * Check that all Guest levels thru the PDE are present, getting the
2934 * PD and PDE in the processes.
2935 */
2936 int rc = VINF_SUCCESS;
2937# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2938# if PGM_GST_TYPE == PGM_TYPE_32BIT
2939 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
2940 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
2941# elif PGM_GST_TYPE == PGM_TYPE_PAE
2942 unsigned iPDSrc;
2943 X86PDPE PdpeSrc;
2944 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
2945 if (!pPDSrc)
2946 return VINF_SUCCESS; /* not present */
2947# elif PGM_GST_TYPE == PGM_TYPE_AMD64
2948 unsigned iPDSrc;
2949 PX86PML4E pPml4eSrc;
2950 X86PDPE PdpeSrc;
2951 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
2952 if (!pPDSrc)
2953 return VINF_SUCCESS; /* not present */
2954# endif
2955 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2956# else
2957 PGSTPD pPDSrc = NULL;
2958 const unsigned iPDSrc = 0;
2959 GSTPDE PdeSrc;
2960
2961 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2962 PdeSrc.n.u1Present = 1;
2963 PdeSrc.n.u1Write = 1;
2964 PdeSrc.n.u1Accessed = 1;
2965 PdeSrc.n.u1User = 1;
2966# endif
2967
2968 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
2969 {
2970# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2971 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVM->pgm.s, GCPtrPage);
2972# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2973 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2974 PX86PDPAE pPDDst;
2975 X86PDEPAE PdeDst;
2976# if PGM_GST_TYPE != PGM_TYPE_PAE
2977 X86PDPE PdpeSrc;
2978
2979 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
2980 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
2981# endif
2982 int rc = pgmShwSyncPaePDPtr(pVM, GCPtrPage, &PdpeSrc, &pPDDst);
2983 if (rc != VINF_SUCCESS)
2984 {
2985 AssertRC(rc);
2986 return rc;
2987 }
2988 Assert(pPDDst);
2989 PdeDst = pPDDst->a[iPDDst];
2990
2991# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2992 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2993 PX86PDPAE pPDDst;
2994 X86PDEPAE PdeDst;
2995
2996# if PGM_GST_TYPE == PGM_TYPE_PROT
2997 /* AMD-V nested paging */
2998 X86PML4E Pml4eSrc;
2999 X86PDPE PdpeSrc;
3000 PX86PML4E pPml4eSrc = &Pml4eSrc;
3001
3002 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3003 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3004 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3005# endif
3006
3007 int rc = pgmShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3008 if (rc != VINF_SUCCESS)
3009 {
3010 AssertRC(rc);
3011 return rc;
3012 }
3013 Assert(pPDDst);
3014 PdeDst = pPDDst->a[iPDDst];
3015# endif
3016 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3017 {
3018 if (!PdeDst.n.u1Present)
3019 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3020 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
3021 else
3022 {
3023 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3024 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3025 * makes no sense to prefetch more than one page.
3026 */
3027 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
3028 if (RT_SUCCESS(rc))
3029 rc = VINF_SUCCESS;
3030 }
3031 }
3032 }
3033 return rc;
3034
3035#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3036 return VINF_SUCCESS; /* ignore */
3037#endif
3038}
3039
3040
3041
3042
3043/**
3044 * Syncs a page during a PGMVerifyAccess() call.
3045 *
3046 * @returns VBox status code (informational included).
3047 * @param GCPtrPage The address of the page to sync.
3048 * @param fPage The effective guest page flags.
3049 * @param uErr The trap error code.
3050 */
3051PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3052{
3053 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3054
3055 Assert(!HWACCMIsNestedPagingActive(pVM));
3056#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3057 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3058
3059# ifndef IN_RING0
3060 if (!(fPage & X86_PTE_US))
3061 {
3062 /*
3063 * Mark this page as safe.
3064 */
3065 /** @todo not correct for pages that contain both code and data!! */
3066 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3067 CSAMMarkPage(pVM, (RTRCPTR)GCPtrPage, true);
3068 }
3069# endif
3070
3071 /*
3072 * Get guest PD and index.
3073 */
3074# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3075# if PGM_GST_TYPE == PGM_TYPE_32BIT
3076 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3077 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3078# elif PGM_GST_TYPE == PGM_TYPE_PAE
3079 unsigned iPDSrc;
3080 X86PDPE PdpeSrc;
3081 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3082
3083 if (pPDSrc)
3084 {
3085 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3086 return VINF_EM_RAW_GUEST_TRAP;
3087 }
3088# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3089 unsigned iPDSrc;
3090 PX86PML4E pPml4eSrc;
3091 X86PDPE PdpeSrc;
3092 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3093 if (!pPDSrc)
3094 {
3095 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3096 return VINF_EM_RAW_GUEST_TRAP;
3097 }
3098# endif
3099# else
3100 PGSTPD pPDSrc = NULL;
3101 const unsigned iPDSrc = 0;
3102# endif
3103 int rc = VINF_SUCCESS;
3104
3105 /*
3106 * First check if the shadow pd is present.
3107 */
3108# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3109 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
3110# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3111 PX86PDEPAE pPdeDst;
3112 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3113 PX86PDPAE pPDDst;
3114# if PGM_GST_TYPE != PGM_TYPE_PAE
3115 X86PDPE PdpeSrc;
3116
3117 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3118 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3119# endif
3120 rc = pgmShwSyncPaePDPtr(pVM, GCPtrPage, &PdpeSrc, &pPDDst);
3121 if (rc != VINF_SUCCESS)
3122 {
3123 AssertRC(rc);
3124 return rc;
3125 }
3126 Assert(pPDDst);
3127 pPdeDst = &pPDDst->a[iPDDst];
3128
3129# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3130 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3131 PX86PDPAE pPDDst;
3132 PX86PDEPAE pPdeDst;
3133
3134# if PGM_GST_TYPE == PGM_TYPE_PROT
3135 /* AMD-V nested paging */
3136 X86PML4E Pml4eSrc;
3137 X86PDPE PdpeSrc;
3138 PX86PML4E pPml4eSrc = &Pml4eSrc;
3139
3140 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3141 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3142 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3143# endif
3144
3145 rc = pgmShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3146 if (rc != VINF_SUCCESS)
3147 {
3148 AssertRC(rc);
3149 return rc;
3150 }
3151 Assert(pPDDst);
3152 pPdeDst = &pPDDst->a[iPDDst];
3153# endif
3154
3155 if (!pPdeDst->n.u1Present)
3156 {
3157# if defined(IN_RC)
3158 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3159 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3160# endif
3161 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
3162# if defined(IN_RC)
3163 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3164 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3165# endif
3166 AssertRC(rc);
3167 if (rc != VINF_SUCCESS)
3168 return rc;
3169 }
3170
3171# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3172 /* Check for dirty bit fault */
3173 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3174 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3175 Log(("PGMVerifyAccess: success (dirty)\n"));
3176 else
3177 {
3178 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3179#else
3180 {
3181 GSTPDE PdeSrc;
3182 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3183 PdeSrc.n.u1Present = 1;
3184 PdeSrc.n.u1Write = 1;
3185 PdeSrc.n.u1Accessed = 1;
3186 PdeSrc.n.u1User = 1;
3187
3188#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3189 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3190 if (uErr & X86_TRAP_PF_US)
3191 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3192 else /* supervisor */
3193 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3194
3195 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
3196 if (RT_SUCCESS(rc))
3197 {
3198 /* Page was successfully synced */
3199 Log2(("PGMVerifyAccess: success (sync)\n"));
3200 rc = VINF_SUCCESS;
3201 }
3202 else
3203 {
3204 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3205 return VINF_EM_RAW_GUEST_TRAP;
3206 }
3207 }
3208 return rc;
3209
3210#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3211
3212 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3213 return VERR_INTERNAL_ERROR;
3214#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3215}
3216
3217
3218#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3219# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
3220/**
3221 * Figures out which kind of shadow page this guest PDE warrants.
3222 *
3223 * @returns Shadow page kind.
3224 * @param pPdeSrc The guest PDE in question.
3225 * @param cr4 The current guest cr4 value.
3226 */
3227DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const GSTPDE *pPdeSrc, uint32_t cr4)
3228{
3229# if PMG_GST_TYPE == PGM_TYPE_AMD64
3230 if (!pPdeSrc->n.u1Size)
3231# else
3232 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
3233# endif
3234 return BTH_PGMPOOLKIND_PT_FOR_PT;
3235 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
3236 //{
3237 // case 0:
3238 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
3239 // case X86_PDE4M_RW:
3240 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
3241 // case X86_PDE4M_US:
3242 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
3243 // case X86_PDE4M_RW | X86_PDE4M_US:
3244 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
3245# if 0
3246 // case X86_PDE4M_PAE_NX:
3247 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
3248 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
3249 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
3250 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
3251 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
3252 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
3253 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
3254# endif
3255 return BTH_PGMPOOLKIND_PT_FOR_BIG;
3256 //}
3257}
3258# endif
3259#endif
3260
3261#undef MY_STAM_COUNTER_INC
3262#define MY_STAM_COUNTER_INC(a) do { } while (0)
3263
3264
3265/**
3266 * Syncs the paging hierarchy starting at CR3.
3267 *
3268 * @returns VBox status code, no specials.
3269 * @param pVM The virtual machine.
3270 * @param cr0 Guest context CR0 register
3271 * @param cr3 Guest context CR3 register
3272 * @param cr4 Guest context CR4 register
3273 * @param fGlobal Including global page directories or not
3274 */
3275PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3276{
3277 if (VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
3278 fGlobal = true; /* Change this CR3 reload to be a global one. */
3279
3280 LogFlow(("SyncCR3 %d\n", fGlobal));
3281
3282#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3283 /*
3284 * Update page access handlers.
3285 * The virtual are always flushed, while the physical are only on demand.
3286 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3287 * have to look into that later because it will have a bad influence on the performance.
3288 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3289 * bird: Yes, but that won't work for aliases.
3290 */
3291 /** @todo this MUST go away. See #1557. */
3292 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3293 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3294 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3295#endif
3296
3297#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3298 /*
3299 * Nested / EPT - almost no work.
3300 */
3301 /** @todo check if this is really necessary; the call does it as well... */
3302 HWACCMFlushTLB(pVM);
3303 return VINF_SUCCESS;
3304
3305#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3306 /*
3307 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3308 * out the shadow parts when the guest modifies its tables.
3309 */
3310 return VINF_SUCCESS;
3311
3312#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3313
3314# ifdef PGM_WITHOUT_MAPPINGS
3315 Assert(pVM->pgm.s.fMappingsFixed);
3316 return VINF_SUCCESS;
3317# else
3318 /* Nothing to do when mappings are fixed. */
3319 if (pVM->pgm.s.fMappingsFixed)
3320 return VINF_SUCCESS;
3321
3322 int rc = PGMMapResolveConflicts(pVM);
3323 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3324 if (rc == VINF_PGM_SYNC_CR3)
3325 {
3326 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3327 return VINF_PGM_SYNC_CR3;
3328 }
3329# endif
3330 return VINF_SUCCESS;
3331#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3332}
3333
3334
3335
3336
3337#ifdef VBOX_STRICT
3338#ifdef IN_RC
3339# undef AssertMsgFailed
3340# define AssertMsgFailed Log
3341#endif
3342#ifdef IN_RING3
3343# include <VBox/dbgf.h>
3344
3345/**
3346 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3347 *
3348 * @returns VBox status code (VINF_SUCCESS).
3349 * @param pVM The VM handle.
3350 * @param cr3 The root of the hierarchy.
3351 * @param crr The cr4, only PAE and PSE is currently used.
3352 * @param fLongMode Set if long mode, false if not long mode.
3353 * @param cMaxDepth Number of levels to dump.
3354 * @param pHlp Pointer to the output functions.
3355 */
3356__BEGIN_DECLS
3357VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3358__END_DECLS
3359
3360#endif
3361
3362/**
3363 * Checks that the shadow page table is in sync with the guest one.
3364 *
3365 * @returns The number of errors.
3366 * @param pVM The virtual machine.
3367 * @param cr3 Guest context CR3 register
3368 * @param cr4 Guest context CR4 register
3369 * @param GCPtr Where to start. Defaults to 0.
3370 * @param cb How much to check. Defaults to everything.
3371 */
3372PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3373{
3374#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3375 return 0;
3376#else
3377 unsigned cErrors = 0;
3378
3379#if PGM_GST_TYPE == PGM_TYPE_PAE
3380 /** @todo currently broken; crashes below somewhere */
3381 AssertFailed();
3382#endif
3383
3384#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3385 || PGM_GST_TYPE == PGM_TYPE_PAE \
3386 || PGM_GST_TYPE == PGM_TYPE_AMD64
3387
3388# if PGM_GST_TYPE == PGM_TYPE_AMD64
3389 bool fBigPagesSupported = true;
3390# else
3391 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
3392# endif
3393 PPGM pPGM = &pVM->pgm.s;
3394 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3395 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3396# ifndef IN_RING0
3397 RTHCPHYS HCPhys; /* general usage. */
3398# endif
3399 int rc;
3400
3401 /*
3402 * Check that the Guest CR3 and all its mappings are correct.
3403 */
3404 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3405 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3406 false);
3407# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3408# if PGM_GST_TYPE == PGM_TYPE_32BIT
3409 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3410# else
3411 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3412# endif
3413 AssertRCReturn(rc, 1);
3414 HCPhys = NIL_RTHCPHYS;
3415 rc = pgmRamGCPhys2HCPhys(pPGM, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3416 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3417# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3418 pgmGstGet32bitPDPtr(pPGM);
3419 RTGCPHYS GCPhys;
3420 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3421 AssertRCReturn(rc, 1);
3422 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3423# endif
3424# endif /* !IN_RING0 */
3425
3426 /*
3427 * Get and check the Shadow CR3.
3428 */
3429# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3430 unsigned cPDEs = X86_PG_ENTRIES;
3431 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3432# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3433# if PGM_GST_TYPE == PGM_TYPE_32BIT
3434 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3435# else
3436 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3437# endif
3438 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3439# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3440 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3441 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3442# endif
3443 if (cb != ~(RTGCPTR)0)
3444 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3445
3446/** @todo call the other two PGMAssert*() functions. */
3447
3448# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3449 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3450# endif
3451
3452# if PGM_GST_TYPE == PGM_TYPE_AMD64
3453 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3454
3455 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3456 {
3457 PPGMPOOLPAGE pShwPdpt = NULL;
3458 PX86PML4E pPml4eSrc;
3459 PX86PML4E pPml4eDst;
3460 RTGCPHYS GCPhysPdptSrc;
3461
3462 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVM->pgm.s, iPml4);
3463 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVM->pgm.s, iPml4);
3464
3465 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3466 if (!pPml4eDst->n.u1Present)
3467 {
3468 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3469 continue;
3470 }
3471
3472 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3473 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3474
3475 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3476 {
3477 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3478 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3479 cErrors++;
3480 continue;
3481 }
3482
3483 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3484 {
3485 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3486 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3487 cErrors++;
3488 continue;
3489 }
3490
3491 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3492 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3493 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3494 {
3495 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3496 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3497 cErrors++;
3498 continue;
3499 }
3500# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3501 {
3502# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3503
3504# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3505 /*
3506 * Check the PDPTEs too.
3507 */
3508 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3509
3510 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3511 {
3512 unsigned iPDSrc;
3513 PPGMPOOLPAGE pShwPde = NULL;
3514 PX86PDPE pPdpeDst;
3515 RTGCPHYS GCPhysPdeSrc;
3516# if PGM_GST_TYPE == PGM_TYPE_PAE
3517 X86PDPE PdpeSrc;
3518 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3519 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
3520# else
3521 PX86PML4E pPml4eSrc;
3522 X86PDPE PdpeSrc;
3523 PX86PDPT pPdptDst;
3524 PX86PDPAE pPDDst;
3525 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3526
3527 rc = pgmShwGetLongModePDPtr(pVM, GCPtr, NULL, &pPdptDst, &pPDDst);
3528 if (rc != VINF_SUCCESS)
3529 {
3530 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3531 GCPtr += 512 * _2M;
3532 continue; /* next PDPTE */
3533 }
3534 Assert(pPDDst);
3535# endif
3536 Assert(iPDSrc == 0);
3537
3538 pPdpeDst = &pPdptDst->a[iPdpt];
3539
3540 if (!pPdpeDst->n.u1Present)
3541 {
3542 GCPtr += 512 * _2M;
3543 continue; /* next PDPTE */
3544 }
3545
3546 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3547 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3548
3549 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3550 {
3551 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3552 GCPtr += 512 * _2M;
3553 cErrors++;
3554 continue;
3555 }
3556
3557 if (GCPhysPdeSrc != pShwPde->GCPhys)
3558 {
3559# if PGM_GST_TYPE == PGM_TYPE_AMD64
3560 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3561# else
3562 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3563# endif
3564 GCPtr += 512 * _2M;
3565 cErrors++;
3566 continue;
3567 }
3568
3569# if PGM_GST_TYPE == PGM_TYPE_AMD64
3570 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3571 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3572 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3573 {
3574 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3575 GCPtr += 512 * _2M;
3576 cErrors++;
3577 continue;
3578 }
3579# endif
3580
3581# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3582 {
3583# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3584# if PGM_GST_TYPE == PGM_TYPE_32BIT
3585 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3586# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3587 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVM->pgm.s);
3588# endif
3589# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3590 /*
3591 * Iterate the shadow page directory.
3592 */
3593 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3594 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3595
3596 for (;
3597 iPDDst < cPDEs;
3598 iPDDst++, GCPtr += cIncrement)
3599 {
3600# if PGM_SHW_TYPE == PGM_TYPE_PAE
3601 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3602# else
3603 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3604# endif
3605 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3606 {
3607 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3608 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3609 {
3610 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3611 cErrors++;
3612 continue;
3613 }
3614 }
3615 else if ( (PdeDst.u & X86_PDE_P)
3616 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3617 )
3618 {
3619 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3620 PPGMPOOLPAGE pPoolPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
3621 if (!pPoolPage)
3622 {
3623 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3624 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3625 cErrors++;
3626 continue;
3627 }
3628 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3629
3630 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3631 {
3632 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3633 GCPtr, (uint64_t)PdeDst.u));
3634 cErrors++;
3635 }
3636
3637 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3638 {
3639 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3640 GCPtr, (uint64_t)PdeDst.u));
3641 cErrors++;
3642 }
3643
3644 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3645 if (!PdeSrc.n.u1Present)
3646 {
3647 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3648 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3649 cErrors++;
3650 continue;
3651 }
3652
3653 if ( !PdeSrc.b.u1Size
3654 || !fBigPagesSupported)
3655 {
3656 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3657# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3658 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3659# endif
3660 }
3661 else
3662 {
3663# if PGM_GST_TYPE == PGM_TYPE_32BIT
3664 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3665 {
3666 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3667 GCPtr, (uint64_t)PdeSrc.u));
3668 cErrors++;
3669 continue;
3670 }
3671# endif
3672 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3673# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3674 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3675# endif
3676 }
3677
3678 if ( pPoolPage->enmKind
3679 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3680 {
3681 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3682 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3683 cErrors++;
3684 }
3685
3686 PPGMPAGE pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3687 if (!pPhysPage)
3688 {
3689 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3690 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3691 cErrors++;
3692 continue;
3693 }
3694
3695 if (GCPhysGst != pPoolPage->GCPhys)
3696 {
3697 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3698 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3699 cErrors++;
3700 continue;
3701 }
3702
3703 if ( !PdeSrc.b.u1Size
3704 || !fBigPagesSupported)
3705 {
3706 /*
3707 * Page Table.
3708 */
3709 const GSTPT *pPTSrc;
3710 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3711 if (RT_FAILURE(rc))
3712 {
3713 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3714 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3715 cErrors++;
3716 continue;
3717 }
3718 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3719 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3720 {
3721 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3722 // (This problem will go away when/if we shadow multiple CR3s.)
3723 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3724 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3725 cErrors++;
3726 continue;
3727 }
3728 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3729 {
3730 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3731 GCPtr, (uint64_t)PdeDst.u));
3732 cErrors++;
3733 continue;
3734 }
3735
3736 /* iterate the page table. */
3737# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3738 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3739 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3740# else
3741 const unsigned offPTSrc = 0;
3742# endif
3743 for (unsigned iPT = 0, off = 0;
3744 iPT < RT_ELEMENTS(pPTDst->a);
3745 iPT++, off += PAGE_SIZE)
3746 {
3747 const SHWPTE PteDst = pPTDst->a[iPT];
3748
3749 /* skip not-present entries. */
3750 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3751 continue;
3752 Assert(PteDst.n.u1Present);
3753
3754 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3755 if (!PteSrc.n.u1Present)
3756 {
3757# ifdef IN_RING3
3758 PGMAssertHandlerAndFlagsInSync(pVM);
3759 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3760# endif
3761 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3762 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3763 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3764 cErrors++;
3765 continue;
3766 }
3767
3768 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3769# if 1 /** @todo sync accessed bit properly... */
3770 fIgnoreFlags |= X86_PTE_A;
3771# endif
3772
3773 /* match the physical addresses */
3774 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3775 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3776
3777# ifdef IN_RING3
3778 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3779 if (RT_FAILURE(rc))
3780 {
3781 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3782 {
3783 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3784 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3785 cErrors++;
3786 continue;
3787 }
3788 }
3789 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3790 {
3791 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3792 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3793 cErrors++;
3794 continue;
3795 }
3796# endif
3797
3798 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3799 if (!pPhysPage)
3800 {
3801# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3802 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3803 {
3804 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3805 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3806 cErrors++;
3807 continue;
3808 }
3809# endif
3810 if (PteDst.n.u1Write)
3811 {
3812 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3813 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3814 cErrors++;
3815 }
3816 fIgnoreFlags |= X86_PTE_RW;
3817 }
3818 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
3819 {
3820 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3821 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3822 cErrors++;
3823 continue;
3824 }
3825
3826 /* flags */
3827 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3828 {
3829 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3830 {
3831 if (PteDst.n.u1Write)
3832 {
3833 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3834 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3835 cErrors++;
3836 continue;
3837 }
3838 fIgnoreFlags |= X86_PTE_RW;
3839 }
3840 else
3841 {
3842 if (PteDst.n.u1Present)
3843 {
3844 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3845 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3846 cErrors++;
3847 continue;
3848 }
3849 fIgnoreFlags |= X86_PTE_P;
3850 }
3851 }
3852 else
3853 {
3854 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3855 {
3856 if (PteDst.n.u1Write)
3857 {
3858 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3859 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3860 cErrors++;
3861 continue;
3862 }
3863 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3864 {
3865 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3866 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3867 cErrors++;
3868 continue;
3869 }
3870 if (PteDst.n.u1Dirty)
3871 {
3872 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3873 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3874 cErrors++;
3875 }
3876# if 0 /** @todo sync access bit properly... */
3877 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3878 {
3879 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3880 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3881 cErrors++;
3882 }
3883 fIgnoreFlags |= X86_PTE_RW;
3884# else
3885 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3886# endif
3887 }
3888 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3889 {
3890 /* access bit emulation (not implemented). */
3891 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3892 {
3893 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3894 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3895 cErrors++;
3896 continue;
3897 }
3898 if (!PteDst.n.u1Accessed)
3899 {
3900 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3901 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3902 cErrors++;
3903 }
3904 fIgnoreFlags |= X86_PTE_P;
3905 }
3906# ifdef DEBUG_sandervl
3907 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3908# endif
3909 }
3910
3911 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3912 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
3913 )
3914 {
3915 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3916 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3917 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3918 cErrors++;
3919 continue;
3920 }
3921 } /* foreach PTE */
3922 }
3923 else
3924 {
3925 /*
3926 * Big Page.
3927 */
3928 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
3929 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
3930 {
3931 if (PdeDst.n.u1Write)
3932 {
3933 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3934 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3935 cErrors++;
3936 continue;
3937 }
3938 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
3939 {
3940 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3941 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3942 cErrors++;
3943 continue;
3944 }
3945# if 0 /** @todo sync access bit properly... */
3946 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
3947 {
3948 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3949 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3950 cErrors++;
3951 }
3952 fIgnoreFlags |= X86_PTE_RW;
3953# else
3954 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3955# endif
3956 }
3957 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3958 {
3959 /* access bit emulation (not implemented). */
3960 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
3961 {
3962 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3963 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3964 cErrors++;
3965 continue;
3966 }
3967 if (!PdeDst.n.u1Accessed)
3968 {
3969 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3970 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3971 cErrors++;
3972 }
3973 fIgnoreFlags |= X86_PTE_P;
3974 }
3975
3976 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
3977 {
3978 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
3979 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
3980 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3981 cErrors++;
3982 }
3983
3984 /* iterate the page table. */
3985 for (unsigned iPT = 0, off = 0;
3986 iPT < RT_ELEMENTS(pPTDst->a);
3987 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
3988 {
3989 const SHWPTE PteDst = pPTDst->a[iPT];
3990
3991 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3992 {
3993 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
3994 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3995 cErrors++;
3996 }
3997
3998 /* skip not-present entries. */
3999 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4000 continue;
4001
4002 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4003
4004 /* match the physical addresses */
4005 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4006
4007# ifdef IN_RING3
4008 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4009 if (RT_FAILURE(rc))
4010 {
4011 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4012 {
4013 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4014 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4015 cErrors++;
4016 }
4017 }
4018 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4019 {
4020 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4021 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4022 cErrors++;
4023 continue;
4024 }
4025# endif
4026 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
4027 if (!pPhysPage)
4028 {
4029# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4030 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4031 {
4032 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4033 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4034 cErrors++;
4035 continue;
4036 }
4037# endif
4038 if (PteDst.n.u1Write)
4039 {
4040 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4041 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4042 cErrors++;
4043 }
4044 fIgnoreFlags |= X86_PTE_RW;
4045 }
4046 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4047 {
4048 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4049 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4050 cErrors++;
4051 continue;
4052 }
4053
4054 /* flags */
4055 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4056 {
4057 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4058 {
4059 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4060 {
4061 if (PteDst.n.u1Write)
4062 {
4063 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4064 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4065 cErrors++;
4066 continue;
4067 }
4068 fIgnoreFlags |= X86_PTE_RW;
4069 }
4070 }
4071 else
4072 {
4073 if (PteDst.n.u1Present)
4074 {
4075 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4076 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4077 cErrors++;
4078 continue;
4079 }
4080 fIgnoreFlags |= X86_PTE_P;
4081 }
4082 }
4083
4084 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4085 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4086 )
4087 {
4088 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4089 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4090 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4091 cErrors++;
4092 continue;
4093 }
4094 } /* for each PTE */
4095 }
4096 }
4097 /* not present */
4098
4099 } /* for each PDE */
4100
4101 } /* for each PDPTE */
4102
4103 } /* for each PML4E */
4104
4105# ifdef DEBUG
4106 if (cErrors)
4107 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4108# endif
4109
4110#endif /* GST == 32BIT, PAE or AMD64 */
4111 return cErrors;
4112
4113#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4114}
4115#endif /* VBOX_STRICT */
4116
4117
4118/**
4119 * Sets up the CR3 for shadow paging
4120 *
4121 * @returns Strict VBox status code.
4122 * @retval VINF_SUCCESS.
4123 *
4124 * @param pVM VM handle.
4125 * @param GCPhysCR3 The physical address in the CR3 register.
4126 */
4127PGM_BTH_DECL(int, MapCR3)(PVM pVM, RTGCPHYS GCPhysCR3)
4128{
4129 /* Update guest paging info. */
4130#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4131 || PGM_GST_TYPE == PGM_TYPE_PAE \
4132 || PGM_GST_TYPE == PGM_TYPE_AMD64
4133
4134 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4135
4136 /*
4137 * Map the page CR3 points at.
4138 */
4139 RTHCPTR HCPtrGuestCR3;
4140 RTHCPHYS HCPhysGuestCR3;
4141# ifdef VBOX_WITH_NEW_PHYS_CODE
4142 pgmLock(pVM);
4143 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4144 AssertReturn(pPage, VERR_INTERNAL_ERROR);
4145 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPage);
4146 /** @todo this needs some reworking wrt. locking. */
4147# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4148 HCPtrGuestCR3 = NIL_RTHCPTR;
4149 int rc = VINF_SUCCESS;
4150# else
4151 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3);
4152# endif
4153 pgmUnlock(pVM);
4154# else /* !VBOX_WITH_NEW_PHYS_CODE */
4155 int rc = pgmRamGCPhys2HCPtrAndHCPhys(&pVM->pgm.s, GCPhysCR3 & GST_CR3_PAGE_MASK, &HCPtrGuestCR3, &HCPhysGuestCR3);
4156# endif /* !VBOX_WITH_NEW_PHYS_CODE */
4157 if (RT_SUCCESS(rc))
4158 {
4159 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4160 if (RT_SUCCESS(rc))
4161 {
4162# ifdef IN_RC
4163 PGM_INVL_PG(pVM->pgm.s.GCPtrCR3Mapping);
4164# endif
4165# if PGM_GST_TYPE == PGM_TYPE_32BIT
4166 pVM->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4167# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4168 pVM->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4169# endif
4170 pVM->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))pVM->pgm.s.GCPtrCR3Mapping;
4171
4172# elif PGM_GST_TYPE == PGM_TYPE_PAE
4173 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4174 pVM->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4175# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4176 pVM->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4177# endif
4178 pVM->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RCPTRTYPE(uint8_t *))pVM->pgm.s.GCPtrCR3Mapping + off);
4179 Log(("Cached mapping %RRv\n", pVM->pgm.s.pGstPaePdptRC));
4180
4181 /*
4182 * Map the 4 PDs too.
4183 */
4184 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVM->pgm.s);
4185 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4186 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4187 {
4188 if (pGuestPDPT->a[i].n.u1Present)
4189 {
4190 RTHCPTR HCPtr;
4191 RTHCPHYS HCPhys;
4192 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4193# ifdef VBOX_WITH_NEW_PHYS_CODE
4194 pgmLock(pVM);
4195 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4196 AssertReturn(pPage, VERR_INTERNAL_ERROR);
4197 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4198# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4199 HCPtr = NIL_RTHCPTR;
4200 int rc2 = VINF_SUCCESS;
4201# else
4202 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4203# endif
4204 pgmUnlock(pVM);
4205# else /* !VBOX_WITH_NEW_PHYS_CODE */
4206 int rc2 = pgmRamGCPhys2HCPtrAndHCPhys(&pVM->pgm.s, GCPhys, &HCPtr, &HCPhys);
4207# endif /* !VBOX_WITH_NEW_PHYS_CODE */
4208 if (RT_SUCCESS(rc2))
4209 {
4210 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4211 AssertRCReturn(rc, rc);
4212
4213 pVM->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4214# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4215 pVM->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4216# endif
4217 pVM->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))GCPtr;
4218 pVM->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4219 PGM_INVL_PG(GCPtr); /** @todo This ends up calling HWACCMInvalidatePage, is that correct? */
4220 continue;
4221 }
4222 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4223 }
4224
4225 pVM->pgm.s.apGstPaePDsR3[i] = 0;
4226# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4227 pVM->pgm.s.apGstPaePDsR0[i] = 0;
4228# endif
4229 pVM->pgm.s.apGstPaePDsRC[i] = 0;
4230 pVM->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4231 PGM_INVL_PG(GCPtr); /** @todo this shouldn't be necessary? */
4232 }
4233
4234# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4235 pVM->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4236# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4237 pVM->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4238# endif
4239# endif
4240 }
4241 else
4242 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4243 }
4244 else
4245 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4246
4247#else /* prot/real stub */
4248 int rc = VINF_SUCCESS;
4249#endif
4250
4251 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4252# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4253 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4254 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4255 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4256 && PGM_GST_TYPE != PGM_TYPE_PROT))
4257
4258 Assert(!HWACCMIsNestedPagingActive(pVM));
4259
4260 /*
4261 * Update the shadow root page as well since that's not fixed.
4262 */
4263 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4264 PPGMPOOLPAGE pOldShwPageCR3 = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
4265 uint32_t iOldShwUserTable = pVM->pgm.s.iShwUserTable;
4266 uint32_t iOldShwUser = pVM->pgm.s.iShwUser;
4267 PPGMPOOLPAGE pNewShwPageCR3;
4268
4269 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4270 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3);
4271 AssertFatalRC(rc);
4272 rc = VINF_SUCCESS;
4273
4274 /* Mark the page as locked; disallow flushing. */
4275 pgmPoolLockPage(pPool, pNewShwPageCR3);
4276
4277# ifdef IN_RC
4278 /* NOTE: We can't deal with jumps to ring 3 here as we're now in an inconsistent state! */
4279 bool fLog = VMMGCLogDisable(pVM);
4280# endif
4281
4282 pVM->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4283 pVM->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4284 pVM->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4285# ifdef IN_RING0
4286 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4287 pVM->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4288# elif defined(IN_RC)
4289 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4290 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4291# else
4292 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4293 pVM->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4294# endif
4295
4296# ifndef PGM_WITHOUT_MAPPINGS
4297 /*
4298 * Apply all hypervisor mappings to the new CR3.
4299 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4300 * make sure we check for conflicts in the new CR3 root.
4301 */
4302# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4303 Assert(VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL) || VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
4304# endif
4305 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4306 AssertRCReturn(rc, rc);
4307# endif
4308
4309 /* Set the current hypervisor CR3. */
4310 CPUMSetHyperCR3(pVM, PGMGetHyperCR3(pVM));
4311 SELMShadowCR3Changed(pVM);
4312
4313# ifdef IN_RC
4314 VMMGCLogRestore(pVM, fLog);
4315# endif
4316
4317 /* Clean up the old CR3 root. */
4318 if (pOldShwPageCR3)
4319 {
4320 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4321# ifndef PGM_WITHOUT_MAPPINGS
4322 /* Remove the hypervisor mappings from the shadow page table. */
4323 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4324# endif
4325 /* Mark the page as unlocked; allow flushing again. */
4326 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4327
4328 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4329 }
4330
4331# endif
4332
4333 return rc;
4334}
4335
4336/**
4337 * Unmaps the shadow CR3.
4338 *
4339 * @returns VBox status, no specials.
4340 * @param pVM VM handle.
4341 */
4342PGM_BTH_DECL(int, UnmapCR3)(PVM pVM)
4343{
4344 LogFlow(("UnmapCR3\n"));
4345
4346 int rc = VINF_SUCCESS;
4347
4348 /* Update guest paging info. */
4349#if PGM_GST_TYPE == PGM_TYPE_32BIT
4350 pVM->pgm.s.pGst32BitPdR3 = 0;
4351#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4352 pVM->pgm.s.pGst32BitPdR0 = 0;
4353#endif
4354 pVM->pgm.s.pGst32BitPdRC = 0;
4355
4356#elif PGM_GST_TYPE == PGM_TYPE_PAE
4357 pVM->pgm.s.pGstPaePdptR3 = 0;
4358# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4359 pVM->pgm.s.pGstPaePdptR0 = 0;
4360# endif
4361 pVM->pgm.s.pGstPaePdptRC = 0;
4362 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4363 {
4364 pVM->pgm.s.apGstPaePDsR3[i] = 0;
4365# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4366 pVM->pgm.s.apGstPaePDsR0[i] = 0;
4367# endif
4368 pVM->pgm.s.apGstPaePDsRC[i] = 0;
4369 pVM->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4370 }
4371
4372#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4373 pVM->pgm.s.pGstAmd64Pml4R3 = 0;
4374# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4375 pVM->pgm.s.pGstAmd64Pml4R0 = 0;
4376# endif
4377
4378#else /* prot/real mode stub */
4379 /* nothing to do */
4380#endif
4381
4382#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4383 /* Update shadow paging info. */
4384# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4385 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4386 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4387
4388# if PGM_GST_TYPE != PGM_TYPE_REAL
4389 Assert(!HWACCMIsNestedPagingActive(pVM));
4390# endif
4391
4392# ifndef PGM_WITHOUT_MAPPINGS
4393 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4394 /* Remove the hypervisor mappings from the shadow page table. */
4395 pgmMapDeactivateCR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4396# endif
4397
4398 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4399 {
4400 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4401
4402 Assert(pVM->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4403
4404 /* Mark the page as unlocked; allow flushing again. */
4405 pgmPoolUnlockPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4406
4407 pgmPoolFreeByPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3), pVM->pgm.s.iShwUser, pVM->pgm.s.iShwUserTable);
4408 pVM->pgm.s.pShwPageCR3R3 = 0;
4409 pVM->pgm.s.pShwPageCR3R0 = 0;
4410 pVM->pgm.s.pShwPageCR3RC = 0;
4411 pVM->pgm.s.iShwUser = 0;
4412 pVM->pgm.s.iShwUserTable = 0;
4413 }
4414# endif
4415#endif /* !IN_RC*/
4416
4417 return rc;
4418}
4419
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette