VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 16859

Last change on this file since 16859 was 16859, checked in by vboxsync, 16 years ago

Load hypervisor CR3 from CPUM (instead of hardcoded fixups in the switchers). Dangerous change. Watch for regressions.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 207.0 KB
Line 
1/* $Id: PGMAllBth.h 16859 2009-02-17 16:19:51Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27__BEGIN_DECLS
28PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42PGM_BTH_DECL(int, MapCR3)(PVM pVM, RTGCPHYS GCPhysCR3);
43PGM_BTH_DECL(int, UnmapCR3)(PVM pVM);
44__END_DECLS
45
46
47/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
48#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
49# error "Invalid combination; PAE guest implies PAE shadow"
50#endif
51
52#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
53 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
54# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
55#endif
56
57#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
58 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
59# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
60#endif
61
62#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
63 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
64# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
65#endif
66
67#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
68# define PGM_WITHOUT_MAPPINGS
69#endif
70
71
72#ifndef IN_RING3
73/**
74 * #PF Handler for raw-mode guest execution.
75 *
76 * @returns VBox status code (appropriate for trap handling and GC return).
77 * @param pVM VM Handle.
78 * @param uErr The trap error code.
79 * @param pRegFrame Trap register frame.
80 * @param pvFault The fault address.
81 */
82PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
83{
84# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
85 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
86 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
87
88# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
89 /*
90 * Hide the instruction fetch trap indicator for now.
91 */
92 /** @todo NXE will change this and we must fix NXE in the switcher too! */
93 if (uErr & X86_TRAP_PF_ID)
94 {
95 uErr &= ~X86_TRAP_PF_ID;
96 TRPMSetErrorCode(pVM, uErr);
97 }
98# endif
99
100 /*
101 * Get PDs.
102 */
103 int rc;
104# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
105# if PGM_GST_TYPE == PGM_TYPE_32BIT
106 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
107 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
108
109# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
110
111# if PGM_GST_TYPE == PGM_TYPE_PAE
112 unsigned iPDSrc;
113# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
114 X86PDPE PdpeSrc;
115 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
116# else
117 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, pvFault, &iPDSrc, NULL);
118# endif
119
120# elif PGM_GST_TYPE == PGM_TYPE_AMD64
121 unsigned iPDSrc;
122 PX86PML4E pPml4eSrc;
123 X86PDPE PdpeSrc;
124 PGSTPD pPDSrc;
125
126 pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
127 Assert(pPml4eSrc);
128# endif
129
130 /* Quick check for a valid guest trap. (PAE & AMD64) */
131 if (!pPDSrc)
132 {
133# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
134 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK));
135# else
136 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK));
137# endif
138 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2GuestTrap; });
139 TRPMSetErrorCode(pVM, uErr);
140 return VINF_EM_RAW_GUEST_TRAP;
141 }
142# endif
143
144# else /* !PGM_WITH_PAGING */
145 PGSTPD pPDSrc = NULL;
146 const unsigned iPDSrc = 0;
147# endif /* !PGM_WITH_PAGING */
148
149
150# if PGM_SHW_TYPE == PGM_TYPE_32BIT
151 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
152 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVM->pgm.s);
153
154# elif PGM_SHW_TYPE == PGM_TYPE_PAE
155 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
156
157# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
158 PX86PDPAE pPDDst;
159# if PGM_GST_TYPE != PGM_TYPE_PAE
160 X86PDPE PdpeSrc;
161
162 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
163 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
164# endif
165 rc = pgmShwSyncPaePDPtr(pVM, pvFault, &PdpeSrc, &pPDDst);
166 if (rc != VINF_SUCCESS)
167 {
168 AssertRC(rc);
169 return rc;
170 }
171 Assert(pPDDst);
172
173# else
174 PX86PDPAE pPDDst = pgmShwGetPaePDPtr(&pVM->pgm.s, pvFault);
175
176 /* Did we mark the PDPT as not present in SyncCR3? */
177 unsigned iPdpt = (pvFault >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
178 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
179 if (!pPdptDst->a[iPdpt].n.u1Present)
180 pPdptDst->a[iPdpt].n.u1Present = 1;
181# endif
182
183# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
184 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
185 PX86PDPAE pPDDst;
186# if PGM_GST_TYPE == PGM_TYPE_PROT
187 /* AMD-V nested paging */
188 X86PML4E Pml4eSrc;
189 X86PDPE PdpeSrc;
190 PX86PML4E pPml4eSrc = &Pml4eSrc;
191
192 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
193 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
194 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
195# endif
196
197 rc = pgmShwSyncLongModePDPtr(pVM, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
198 if (rc != VINF_SUCCESS)
199 {
200 AssertRC(rc);
201 return rc;
202 }
203 Assert(pPDDst);
204
205# elif PGM_SHW_TYPE == PGM_TYPE_EPT
206 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
207 PEPTPD pPDDst;
208
209 rc = pgmShwGetEPTPDPtr(pVM, pvFault, NULL, &pPDDst);
210 if (rc != VINF_SUCCESS)
211 {
212 AssertRC(rc);
213 return rc;
214 }
215 Assert(pPDDst);
216# endif
217
218# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
219 /*
220 * If we successfully correct the write protection fault due to dirty bit
221 * tracking, or this page fault is a genuine one, then return immediately.
222 */
223 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
224 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
225 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
226 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
227 || rc == VINF_EM_RAW_GUEST_TRAP)
228 {
229 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
230 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVM->pgm.s.StatRZTrap0eTime2GuestTrap; });
231 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
232 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
233 }
234
235 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0ePD[iPDSrc]);
236# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
237
238 /*
239 * A common case is the not-present error caused by lazy page table syncing.
240 *
241 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
242 * so we can safely assume that the shadow PT is present when calling SyncPage later.
243 *
244 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
245 * of mapping conflict and defer to SyncCR3 in R3.
246 * (Again, we do NOT support access handlers for non-present guest pages.)
247 *
248 */
249# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
250 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
251# else
252 GSTPDE PdeSrc;
253 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
254 PdeSrc.n.u1Present = 1;
255 PdeSrc.n.u1Write = 1;
256 PdeSrc.n.u1Accessed = 1;
257 PdeSrc.n.u1User = 1;
258# endif
259 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
260 && !pPDDst->a[iPDDst].n.u1Present
261 && PdeSrc.n.u1Present
262 )
263
264 {
265 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2SyncPT; });
266 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
267 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
268 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, pvFault);
269 if (RT_SUCCESS(rc))
270 {
271 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
272 return rc;
273 }
274 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
275 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
276 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
277 return VINF_PGM_SYNC_CR3;
278 }
279
280# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
281 /*
282 * Check if this address is within any of our mappings.
283 *
284 * This is *very* fast and it's gonna save us a bit of effort below and prevent
285 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
286 * (BTW, it's impossible to have physical access handlers in a mapping.)
287 */
288 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
289 {
290 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
291 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
292 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
293 {
294 if (pvFault < pMapping->GCPtr)
295 break;
296 if (pvFault - pMapping->GCPtr < pMapping->cb)
297 {
298 /*
299 * The first thing we check is if we've got an undetected conflict.
300 */
301 if (!pVM->pgm.s.fMappingsFixed)
302 {
303 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
304 while (iPT-- > 0)
305 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
306 {
307 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eConflicts);
308 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
309 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
310 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
311 return VINF_PGM_SYNC_CR3;
312 }
313 }
314
315 /*
316 * Check if the fault address is in a virtual page access handler range.
317 */
318 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
319 if ( pCur
320 && pvFault - pCur->Core.Key < pCur->cb
321 && uErr & X86_TRAP_PF_RW)
322 {
323# ifdef IN_RC
324 STAM_PROFILE_START(&pCur->Stat, h);
325 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
326 STAM_PROFILE_STOP(&pCur->Stat, h);
327# else
328 AssertFailed();
329 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
330# endif
331 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersMapping);
332 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
333 return rc;
334 }
335
336 /*
337 * Pretend we're not here and let the guest handle the trap.
338 */
339 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
340 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eGuestPFMapping);
341 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
342 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
343 return VINF_EM_RAW_GUEST_TRAP;
344 }
345 }
346 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
347 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
348# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
349
350 /*
351 * Check if this fault address is flagged for special treatment,
352 * which means we'll have to figure out the physical address and
353 * check flags associated with it.
354 *
355 * ASSUME that we can limit any special access handling to pages
356 * in page tables which the guest believes to be present.
357 */
358 if (PdeSrc.n.u1Present)
359 {
360 RTGCPHYS GCPhys = NIL_RTGCPHYS;
361
362# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
363# if PGM_GST_TYPE == PGM_TYPE_AMD64
364 bool fBigPagesSupported = true;
365# else
366 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
367# endif
368 if ( PdeSrc.b.u1Size
369 && fBigPagesSupported)
370 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
371 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
372 else
373 {
374 PGSTPT pPTSrc;
375 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
376 if (RT_SUCCESS(rc))
377 {
378 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
379 if (pPTSrc->a[iPTESrc].n.u1Present)
380 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
381 }
382 }
383# else
384 /* No paging so the fault address is the physical address */
385 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
386# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
387
388 /*
389 * If we have a GC address we'll check if it has any flags set.
390 */
391 if (GCPhys != NIL_RTGCPHYS)
392 {
393 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
394
395 PPGMPAGE pPage;
396 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
397 if (RT_SUCCESS(rc))
398 {
399 if ( PGM_PAGE_HAS_ACTIVE_PHYSICAL_HANDLERS(pPage)
400 || PGM_PAGE_HAS_ACTIVE_VIRTUAL_HANDLERS(pPage))
401 {
402 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
403 {
404 /*
405 * Physical page access handler.
406 */
407 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
408 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
409 if (pCur)
410 {
411# ifdef PGM_SYNC_N_PAGES
412 /*
413 * If the region is write protected and we got a page not present fault, then sync
414 * the pages. If the fault was caused by a read, then restart the instruction.
415 * In case of write access continue to the GC write handler.
416 *
417 * ASSUMES that there is only one handler per page or that they have similar write properties.
418 */
419 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
420 && !(uErr & X86_TRAP_PF_P))
421 {
422 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
423 if ( RT_FAILURE(rc)
424 || !(uErr & X86_TRAP_PF_RW)
425 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
426 {
427 AssertRC(rc);
428 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
429 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
430 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
431 return rc;
432 }
433 }
434# endif
435
436 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
437 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
438 ("Unexpected trap for physical handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
439
440# if defined(IN_RC) || defined(IN_RING0)
441 if (pCur->CTX_SUFF(pfnHandler))
442 {
443 STAM_PROFILE_START(&pCur->Stat, h);
444 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pCur->CTX_SUFF(pvUser));
445 STAM_PROFILE_STOP(&pCur->Stat, h);
446 }
447 else
448# endif
449 rc = VINF_EM_RAW_EMULATE_INSTR;
450 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersPhysical);
451 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
452 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndPhys; });
453 return rc;
454 }
455 }
456# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
457 else
458 {
459# ifdef PGM_SYNC_N_PAGES
460 /*
461 * If the region is write protected and we got a page not present fault, then sync
462 * the pages. If the fault was caused by a read, then restart the instruction.
463 * In case of write access continue to the GC write handler.
464 */
465 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
466 && !(uErr & X86_TRAP_PF_P))
467 {
468 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
469 if ( RT_FAILURE(rc)
470 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
471 || !(uErr & X86_TRAP_PF_RW))
472 {
473 AssertRC(rc);
474 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
475 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
476 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
477 return rc;
478 }
479 }
480# endif
481 /*
482 * Ok, it's an virtual page access handler.
483 *
484 * Since it's faster to search by address, we'll do that first
485 * and then retry by GCPhys if that fails.
486 */
487 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
488 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
489 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
490 */
491 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
492 if (pCur)
493 {
494 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
495 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
496 || !(uErr & X86_TRAP_PF_P)
497 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
498 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) HCPhys=%HGp uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
499
500 if ( pvFault - pCur->Core.Key < pCur->cb
501 && ( uErr & X86_TRAP_PF_RW
502 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
503 {
504# ifdef IN_RC
505 STAM_PROFILE_START(&pCur->Stat, h);
506 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
507 STAM_PROFILE_STOP(&pCur->Stat, h);
508# else
509 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
510# endif
511 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtual);
512 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
513 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
514 return rc;
515 }
516 /* Unhandled part of a monitored page */
517 }
518 else
519 {
520 /* Check by physical address. */
521 PPGMVIRTHANDLER pCur;
522 unsigned iPage;
523 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
524 &pCur, &iPage);
525 Assert(RT_SUCCESS(rc) || !pCur);
526 if ( pCur
527 && ( uErr & X86_TRAP_PF_RW
528 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
529 {
530 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
531# ifdef IN_RC
532 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
533 Assert(off < pCur->cb);
534 STAM_PROFILE_START(&pCur->Stat, h);
535 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
536 STAM_PROFILE_STOP(&pCur->Stat, h);
537# else
538 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
539# endif
540 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
541 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
542 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
543 return rc;
544 }
545 }
546 }
547# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
548
549 /*
550 * There is a handled area of the page, but this fault doesn't belong to it.
551 * We must emulate the instruction.
552 *
553 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
554 * we first check if this was a page-not-present fault for a page with only
555 * write access handlers. Restart the instruction if it wasn't a write access.
556 */
557 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersUnhandled);
558
559 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
560 && !(uErr & X86_TRAP_PF_P))
561 {
562 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
563 if ( RT_FAILURE(rc)
564 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
565 || !(uErr & X86_TRAP_PF_RW))
566 {
567 AssertRC(rc);
568 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
569 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
570 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
571 return rc;
572 }
573 }
574
575 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
576 * It's writing to an unhandled part of the LDT page several million times.
577 */
578 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
579 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d HCPhys=%RHp%s%s\n",
580 rc, pPage->HCPhys,
581 PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage) ? " phys" : "",
582 PGM_PAGE_HAS_ANY_VIRTUAL_HANDLERS(pPage) ? " virt" : ""));
583 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
584 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndUnhandled; });
585 return rc;
586 } /* if any kind of handler */
587
588# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
589 if (uErr & X86_TRAP_PF_P)
590 {
591 /*
592 * The page isn't marked, but it might still be monitored by a virtual page access handler.
593 * (ASSUMES no temporary disabling of virtual handlers.)
594 */
595 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
596 * we should correct both the shadow page table and physical memory flags, and not only check for
597 * accesses within the handler region but for access to pages with virtual handlers. */
598 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
599 if (pCur)
600 {
601 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
602 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
603 || !(uErr & X86_TRAP_PF_P)
604 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
605 ("Unexpected trap for virtual handler: %08X (phys=%08x) HCPhys=%X uErr=%X, enum=%d\n", pvFault, GCPhys, pPage->HCPhys, uErr, pCur->enmType));
606
607 if ( pvFault - pCur->Core.Key < pCur->cb
608 && ( uErr & X86_TRAP_PF_RW
609 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
610 {
611# ifdef IN_RC
612 STAM_PROFILE_START(&pCur->Stat, h);
613 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
614 STAM_PROFILE_STOP(&pCur->Stat, h);
615# else
616 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
617# endif
618 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
619 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
620 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
621 return rc;
622 }
623 }
624 }
625# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
626 }
627 else
628 {
629 /* When the guest accesses invalid physical memory (e.g. probing of RAM or accessing a remapped MMIO range), then we'll fall
630 * back to the recompiler to emulate the instruction.
631 */
632 LogFlow(("pgmPhysGetPageEx %RGp failed with %Rrc\n", GCPhys, rc));
633 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersInvalid);
634 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
635 return VINF_EM_RAW_EMULATE_INSTR;
636 }
637
638 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
639
640# ifdef PGM_OUT_OF_SYNC_IN_GC
641 /*
642 * We are here only if page is present in Guest page tables and trap is not handled
643 * by our handlers.
644 * Check it for page out-of-sync situation.
645 */
646 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
647
648 if (!(uErr & X86_TRAP_PF_P))
649 {
650 /*
651 * Page is not present in our page tables.
652 * Try to sync it!
653 * BTW, fPageShw is invalid in this branch!
654 */
655 if (uErr & X86_TRAP_PF_US)
656 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
657 else /* supervisor */
658 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
659
660# if defined(LOG_ENABLED) && !defined(IN_RING0)
661 RTGCPHYS GCPhys;
662 uint64_t fPageGst;
663 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
664 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%RGp scan=%d\n",
665 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)));
666# endif /* LOG_ENABLED */
667
668# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
669 if (CPUMGetGuestCPL(pVM, pRegFrame) == 0)
670 {
671 uint64_t fPageGst;
672 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
673 if ( RT_SUCCESS(rc)
674 && !(fPageGst & X86_PTE_US))
675 {
676 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
677 if ( pvFault == (RTGCPTR)pRegFrame->eip
678 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
679# ifdef CSAM_DETECT_NEW_CODE_PAGES
680 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
681 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)) /* any new code we encounter here */
682# endif /* CSAM_DETECT_NEW_CODE_PAGES */
683 )
684 {
685 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
686 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
687 if (rc != VINF_SUCCESS)
688 {
689 /*
690 * CSAM needs to perform a job in ring 3.
691 *
692 * Sync the page before going to the host context; otherwise we'll end up in a loop if
693 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
694 */
695 LogFlow(("CSAM ring 3 job\n"));
696 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, 1, uErr);
697 AssertRC(rc2);
698
699 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
700 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2CSAM; });
701 return rc;
702 }
703 }
704# ifdef CSAM_DETECT_NEW_CODE_PAGES
705 else if ( uErr == X86_TRAP_PF_RW
706 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
707 && pRegFrame->ecx < 0x10000)
708 {
709 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
710 * to detect loading of new code pages.
711 */
712
713 /*
714 * Decode the instruction.
715 */
716 RTGCPTR PC;
717 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
718 if (rc == VINF_SUCCESS)
719 {
720 DISCPUSTATE Cpu;
721 uint32_t cbOp;
722 rc = EMInterpretDisasOneEx(pVM, PC, pRegFrame, &Cpu, &cbOp);
723
724 /* For now we'll restrict this to rep movsw/d instructions */
725 if ( rc == VINF_SUCCESS
726 && Cpu.pCurInstr->opcode == OP_MOVSWD
727 && (Cpu.prefix & PREFIX_REP))
728 {
729 CSAMMarkPossibleCodePage(pVM, pvFault);
730 }
731 }
732 }
733# endif /* CSAM_DETECT_NEW_CODE_PAGES */
734
735 /*
736 * Mark this page as safe.
737 */
738 /** @todo not correct for pages that contain both code and data!! */
739 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
740 CSAMMarkPage(pVM, (RTRCPTR)pvFault, true);
741 }
742 }
743# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
744 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
745 if (RT_SUCCESS(rc))
746 {
747 /* The page was successfully synced, return to the guest. */
748 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
749 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSync; });
750 return VINF_SUCCESS;
751 }
752 }
753 else
754 {
755 /*
756 * A side effect of not flushing global PDEs are out of sync pages due
757 * to physical monitored regions, that are no longer valid.
758 * Assume for now it only applies to the read/write flag
759 */
760 if (RT_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
761 {
762 if (uErr & X86_TRAP_PF_US)
763 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
764 else /* supervisor */
765 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
766
767
768 /*
769 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the page is not present, which is not true in this case.
770 */
771 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, 1, uErr);
772 if (RT_SUCCESS(rc))
773 {
774 /*
775 * Page was successfully synced, return to guest.
776 */
777# ifdef VBOX_STRICT
778 RTGCPHYS GCPhys;
779 uint64_t fPageGst;
780 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
781 Assert(RT_SUCCESS(rc) && fPageGst & X86_PTE_RW);
782 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
783
784 uint64_t fPageShw;
785 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
786 AssertMsg(RT_SUCCESS(rc) && fPageShw & X86_PTE_RW, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
787# endif /* VBOX_STRICT */
788 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
789 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
790 return VINF_SUCCESS;
791 }
792
793 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
794 if ( CPUMGetGuestCPL(pVM, pRegFrame) == 0
795 && ((CPUMGetGuestCR0(pVM) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG)
796 && (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P))
797 {
798 uint64_t fPageGst;
799 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
800 if ( RT_SUCCESS(rc)
801 && !(fPageGst & X86_PTE_RW))
802 {
803 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
804 if (RT_SUCCESS(rc))
805 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eWPEmulInRZ);
806 else
807 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eWPEmulToR3);
808 return rc;
809 }
810 AssertMsgFailed(("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
811 }
812 }
813
814# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
815# ifdef VBOX_STRICT
816 /*
817 * Check for VMM page flags vs. Guest page flags consistency.
818 * Currently only for debug purposes.
819 */
820 if (RT_SUCCESS(rc))
821 {
822 /* Get guest page flags. */
823 uint64_t fPageGst;
824 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
825 if (RT_SUCCESS(rc))
826 {
827 uint64_t fPageShw;
828 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
829
830 /*
831 * Compare page flags.
832 * Note: we have AVL, A, D bits desynched.
833 */
834 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
835 ("Page flags mismatch! pvFault=%RGv GCPhys=%RGp fPageShw=%08llx fPageGst=%08llx\n", pvFault, GCPhys, fPageShw, fPageGst));
836 }
837 else
838 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
839 }
840 else
841 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
842# endif /* VBOX_STRICT */
843# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
844 }
845 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
846# endif /* PGM_OUT_OF_SYNC_IN_GC */
847 }
848 else
849 {
850 /*
851 * Page not present in Guest OS or invalid page table address.
852 * This is potential virtual page access handler food.
853 *
854 * For the present we'll say that our access handlers don't
855 * work for this case - we've already discarded the page table
856 * not present case which is identical to this.
857 *
858 * When we perchance find we need this, we will probably have AVL
859 * trees (offset based) to operate on and we can measure their speed
860 * agains mapping a page table and probably rearrange this handling
861 * a bit. (Like, searching virtual ranges before checking the
862 * physical address.)
863 */
864 }
865 }
866
867
868# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
869 /*
870 * Conclusion, this is a guest trap.
871 */
872 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
873 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eGuestPFUnh);
874 return VINF_EM_RAW_GUEST_TRAP;
875# else
876 /* present, but not a monitored page; perhaps the guest is probing physical memory */
877 return VINF_EM_RAW_EMULATE_INSTR;
878# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
879
880
881# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
882
883 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
884 return VERR_INTERNAL_ERROR;
885# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
886}
887#endif /* !IN_RING3 */
888
889
890/**
891 * Emulation of the invlpg instruction.
892 *
893 *
894 * @returns VBox status code.
895 *
896 * @param pVM VM handle.
897 * @param GCPtrPage Page to invalidate.
898 *
899 * @remark ASSUMES that the guest is updating before invalidating. This order
900 * isn't required by the CPU, so this is speculative and could cause
901 * trouble.
902 *
903 * @todo Flush page or page directory only if necessary!
904 * @todo Add a #define for simply invalidating the page.
905 */
906PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCPTR GCPtrPage)
907{
908#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
909 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
910 && PGM_SHW_TYPE != PGM_TYPE_EPT
911 int rc;
912
913 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
914 /*
915 * Get the shadow PD entry and skip out if this PD isn't present.
916 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
917 */
918# if PGM_SHW_TYPE == PGM_TYPE_32BIT
919 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
920 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
921
922# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
923 /* Fetch the pgm pool shadow descriptor. */
924 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
925 Assert(pShwPde);
926# endif
927
928# elif PGM_SHW_TYPE == PGM_TYPE_PAE
929 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
930 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
931
932 /* If the shadow PDPE isn't present, then skip the invalidate. */
933 if (!pPdptDst->a[iPdpt].n.u1Present)
934 {
935 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
936 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
937 return VINF_SUCCESS;
938 }
939
940# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
941 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
942 PPGMPOOLPAGE pShwPde;
943 PX86PDPAE pPDDst;
944
945 /* Fetch the pgm pool shadow descriptor. */
946 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
947 AssertRCSuccessReturn(rc, rc);
948 Assert(pShwPde);
949
950 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
951 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
952# else
953 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) /*& SHW_PD_MASK - pool index only atm! */;
954 PX86PDEPAE pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
955# endif
956
957# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
958 /* PML4 */
959 AssertReturn(pVM->pgm.s.pShwRootR3, VERR_INTERNAL_ERROR);
960
961 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
962 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
963 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
964 PX86PDPAE pPDDst;
965 PX86PDPT pPdptDst;
966 PX86PML4E pPml4eDst;
967 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
968 if (rc != VINF_SUCCESS)
969 {
970 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
971 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
972 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
973 PGM_INVL_GUEST_TLBS();
974 return VINF_SUCCESS;
975 }
976 Assert(pPDDst);
977
978 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
979 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
980
981 if (!pPdpeDst->n.u1Present)
982 {
983 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
984 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
985 PGM_INVL_GUEST_TLBS();
986 return VINF_SUCCESS;
987 }
988
989# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
990
991 const SHWPDE PdeDst = *pPdeDst;
992 if (!PdeDst.n.u1Present)
993 {
994 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
995 return VINF_SUCCESS;
996 }
997
998 /*
999 * Get the guest PD entry and calc big page.
1000 */
1001# if PGM_GST_TYPE == PGM_TYPE_32BIT
1002 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
1003 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1004 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1005# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1006 unsigned iPDSrc;
1007# if PGM_GST_TYPE == PGM_TYPE_PAE
1008 X86PDPE PdpeSrc;
1009 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1010# else /* AMD64 */
1011 PX86PML4E pPml4eSrc;
1012 X86PDPE PdpeSrc;
1013 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1014# endif
1015 GSTPDE PdeSrc;
1016
1017 if (pPDSrc)
1018 PdeSrc = pPDSrc->a[iPDSrc];
1019 else
1020 PdeSrc.u = 0;
1021# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1022
1023# if PGM_GST_TYPE == PGM_TYPE_AMD64
1024 const bool fIsBigPage = PdeSrc.b.u1Size;
1025# else
1026 const bool fIsBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1027# endif
1028
1029# ifdef IN_RING3
1030 /*
1031 * If a CR3 Sync is pending we may ignore the invalidate page operation
1032 * depending on the kind of sync and if it's a global page or not.
1033 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1034 */
1035# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1036 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
1037 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
1038 && fIsBigPage
1039 && PdeSrc.b.u1Global
1040 )
1041 )
1042# else
1043 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1044# endif
1045 {
1046 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1047 return VINF_SUCCESS;
1048 }
1049# endif /* IN_RING3 */
1050
1051# if PGM_GST_TYPE == PGM_TYPE_AMD64
1052 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1053
1054 /* Fetch the pgm pool shadow descriptor. */
1055 PPGMPOOLPAGE pShwPdpt = pgmPoolGetPageByHCPhys(pVM, pPml4eDst->u & X86_PML4E_PG_MASK);
1056 Assert(pShwPdpt);
1057
1058 /* Fetch the pgm pool shadow descriptor. */
1059 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1060 Assert(pShwPde);
1061
1062 Assert(pPml4eDst->n.u1Present && (pPml4eDst->u & SHW_PDPT_MASK));
1063 RTGCPHYS GCPhysPdpt = pPml4eSrc->u & X86_PML4E_PG_MASK;
1064
1065 if ( !pPml4eSrc->n.u1Present
1066 || pShwPdpt->GCPhys != GCPhysPdpt)
1067 {
1068 LogFlow(("InvalidatePage: Out-of-sync PML4E (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1069 GCPtrPage, pShwPdpt->GCPhys, GCPhysPdpt, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1070 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1071 pPml4eDst->u = 0;
1072 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1073 PGM_INVL_GUEST_TLBS();
1074 return VINF_SUCCESS;
1075 }
1076 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
1077 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
1078 {
1079 /*
1080 * Mark not present so we can resync the PML4E when it's used.
1081 */
1082 LogFlow(("InvalidatePage: Out-of-sync PML4E at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1083 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1084 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1085 pPml4eDst->u = 0;
1086 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1087 PGM_INVL_GUEST_TLBS();
1088 }
1089 else if (!pPml4eSrc->n.u1Accessed)
1090 {
1091 /*
1092 * Mark not present so we can set the accessed bit.
1093 */
1094 LogFlow(("InvalidatePage: Out-of-sync PML4E (A) at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1095 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1096 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1097 pPml4eDst->u = 0;
1098 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1099 PGM_INVL_GUEST_TLBS();
1100 }
1101
1102 /* Check if the PDPT entry has changed. */
1103 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
1104 RTGCPHYS GCPhysPd = PdpeSrc.u & GST_PDPE_PG_MASK;
1105 if ( !PdpeSrc.n.u1Present
1106 || pShwPde->GCPhys != GCPhysPd)
1107 {
1108 LogFlow(("InvalidatePage: Out-of-sync PDPE (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1109 GCPtrPage, pShwPde->GCPhys, GCPhysPd, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1110 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1111 pPdpeDst->u = 0;
1112 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1113 PGM_INVL_GUEST_TLBS();
1114 return VINF_SUCCESS;
1115 }
1116 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
1117 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
1118 {
1119 /*
1120 * Mark not present so we can resync the PDPTE when it's used.
1121 */
1122 LogFlow(("InvalidatePage: Out-of-sync PDPE at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1123 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1124 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1125 pPdpeDst->u = 0;
1126 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1127 PGM_INVL_GUEST_TLBS();
1128 }
1129 else if (!PdpeSrc.lm.u1Accessed)
1130 {
1131 /*
1132 * Mark not present so we can set the accessed bit.
1133 */
1134 LogFlow(("InvalidatePage: Out-of-sync PDPE (A) at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1135 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1136 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1137 pPdpeDst->u = 0;
1138 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1139 PGM_INVL_GUEST_TLBS();
1140 }
1141# endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1142
1143# if PGM_GST_TYPE == PGM_TYPE_PAE && !defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1144 /*
1145 * Update the shadow PDPE and free all the shadow PD entries if the PDPE is marked not present.
1146 * Note: This shouldn't actually be necessary as we monitor the PDPT page for changes.
1147 */
1148 if (!pPDSrc)
1149 {
1150 /* Guest PDPE not present */
1151 PX86PDPAE pPDDst = pgmShwGetPaePDPtr(&pVM->pgm.s, GCPtrPage);
1152 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1153
1154 Assert(!PdpeSrc.n.u1Present);
1155 LogFlow(("InvalidatePage: guest PDPE %d not present; clear shw pdpe\n", iPdpt));
1156
1157 /* for each page directory entry */
1158 for (unsigned iPD = 0; iPD < X86_PG_PAE_ENTRIES; iPD++)
1159 {
1160 if ( pPDDst->a[iPD].n.u1Present
1161 && !(pPDDst->a[iPD].u & PGM_PDFLAGS_MAPPING))
1162 {
1163 pgmPoolFree(pVM, pPDDst->a[iPD].u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPdpt * X86_PG_PAE_ENTRIES + iPD);
1164 pPDDst->a[iPD].u = 0;
1165 }
1166 }
1167 if (!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING))
1168 pPdptDst->a[iPdpt].n.u1Present = 0;
1169 PGM_INVL_GUEST_TLBS();
1170 }
1171 AssertMsg(pVM->pgm.s.fMappingsFixed || (PdpeSrc.u & X86_PDPE_PG_MASK) == pVM->pgm.s.aGCPhysGstPaePDsMonitored[iPdpt], ("%RGp vs %RGp (mon)\n", (PdpeSrc.u & X86_PDPE_PG_MASK), pVM->pgm.s.aGCPhysGstPaePDsMonitored[iPdpt]));
1172# endif
1173
1174
1175 /*
1176 * Deal with the Guest PDE.
1177 */
1178 rc = VINF_SUCCESS;
1179 if (PdeSrc.n.u1Present)
1180 {
1181 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1182 {
1183 /*
1184 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1185 */
1186 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1187 Assert(PGMGetGuestMode(pVM) <= PGMMODE_PAE);
1188 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
1189 }
1190 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
1191 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1192 {
1193 /*
1194 * Mark not present so we can resync the PDE when it's used.
1195 */
1196 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1197 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1198# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1199 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1200# else
1201 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1202# endif
1203 pPdeDst->u = 0;
1204 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1205 PGM_INVL_GUEST_TLBS();
1206 }
1207 else if (!PdeSrc.n.u1Accessed)
1208 {
1209 /*
1210 * Mark not present so we can set the accessed bit.
1211 */
1212 LogFlow(("InvalidatePage: Out-of-sync (A) at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1213 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1214# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1215 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1216# else
1217 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1218# endif
1219 pPdeDst->u = 0;
1220 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1221 PGM_INVL_GUEST_TLBS();
1222 }
1223 else if (!fIsBigPage)
1224 {
1225 /*
1226 * 4KB - page.
1227 */
1228 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1229 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1230# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1231 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1232 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1233# endif
1234 if (pShwPage->GCPhys == GCPhys)
1235 {
1236# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1237 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1238 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1239 if (pPT->a[iPTEDst].n.u1Present)
1240 {
1241# ifdef PGMPOOL_WITH_USER_TRACKING
1242 /* This is very unlikely with caching/monitoring enabled. */
1243 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1244# endif
1245 pPT->a[iPTEDst].u = 0;
1246 }
1247# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1248 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
1249 if (RT_SUCCESS(rc))
1250 rc = VINF_SUCCESS;
1251# endif
1252 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1253 PGM_INVL_PG(GCPtrPage);
1254 }
1255 else
1256 {
1257 /*
1258 * The page table address changed.
1259 */
1260 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1261 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1262# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1263 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1264# else
1265 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1266# endif
1267 pPdeDst->u = 0;
1268 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1269 PGM_INVL_GUEST_TLBS();
1270 }
1271 }
1272 else
1273 {
1274 /*
1275 * 2/4MB - page.
1276 */
1277 /* Before freeing the page, check if anything really changed. */
1278 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1279 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1280# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1281 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1282 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1283# endif
1284 if ( pShwPage->GCPhys == GCPhys
1285 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1286 {
1287 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1288 /** @todo PAT */
1289 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1290 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1291 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1292 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1293 {
1294 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1295 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1296 return VINF_SUCCESS;
1297 }
1298 }
1299
1300 /*
1301 * Ok, the page table is present and it's been changed in the guest.
1302 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1303 * We could do this for some flushes in GC too, but we need an algorithm for
1304 * deciding which 4MB pages containing code likely to be executed very soon.
1305 */
1306 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1307 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1308# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1309 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1310# else
1311 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1312# endif
1313 pPdeDst->u = 0;
1314 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1315 PGM_INVL_BIG_PG(GCPtrPage);
1316 }
1317 }
1318 else
1319 {
1320 /*
1321 * Page directory is not present, mark shadow PDE not present.
1322 */
1323 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1324 {
1325# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1326 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1327# else
1328 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPDDst);
1329# endif
1330 pPdeDst->u = 0;
1331 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1332 PGM_INVL_PG(GCPtrPage);
1333 }
1334 else
1335 {
1336 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1337 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1338 }
1339 }
1340
1341 return rc;
1342
1343#else /* guest real and protected mode */
1344 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1345 return VINF_SUCCESS;
1346#endif
1347}
1348
1349
1350#ifdef PGMPOOL_WITH_USER_TRACKING
1351/**
1352 * Update the tracking of shadowed pages.
1353 *
1354 * @param pVM The VM handle.
1355 * @param pShwPage The shadow page.
1356 * @param HCPhys The physical page we is being dereferenced.
1357 */
1358DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1359{
1360# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1361 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1362 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1363
1364 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1365 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1366 * 2. write protect all shadowed pages. I.e. implement caching.
1367 */
1368 /*
1369 * Find the guest address.
1370 */
1371 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1372 pRam;
1373 pRam = pRam->CTX_SUFF(pNext))
1374 {
1375 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1376 while (iPage-- > 0)
1377 {
1378 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1379 {
1380 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1381 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1382 pShwPage->cPresent--;
1383 pPool->cPresent--;
1384 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1385 return;
1386 }
1387 }
1388 }
1389
1390 for (;;)
1391 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1392# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1393 pShwPage->cPresent--;
1394 pVM->pgm.s.CTX_SUFF(pPool)->cPresent--;
1395# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1396}
1397
1398
1399/**
1400 * Update the tracking of shadowed pages.
1401 *
1402 * @param pVM The VM handle.
1403 * @param pShwPage The shadow page.
1404 * @param u16 The top 16-bit of the pPage->HCPhys.
1405 * @param pPage Pointer to the guest page. this will be modified.
1406 * @param iPTDst The index into the shadow table.
1407 */
1408DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1409{
1410# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1411 /*
1412 * We're making certain assumptions about the placement of cRef and idx.
1413 */
1414 Assert(MM_RAM_FLAGS_IDX_SHIFT == 48);
1415 Assert(MM_RAM_FLAGS_CREFS_SHIFT > MM_RAM_FLAGS_IDX_SHIFT);
1416
1417 /*
1418 * Just deal with the simple first time here.
1419 */
1420 if (!u16)
1421 {
1422 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1423 u16 = (1 << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) | pShwPage->idx;
1424 }
1425 else
1426 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1427
1428 /* write back, trying to be clever... */
1429 Log2(("SyncPageWorkerTrackAddRef: u16=%#x pPage->HCPhys=%RHp->%RHp iPTDst=%#x\n",
1430 u16, pPage->HCPhys, (pPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) | ((uint64_t)u16 << MM_RAM_FLAGS_CREFS_SHIFT), iPTDst));
1431 *((uint16_t *)&pPage->HCPhys + 3) = u16; /** @todo PAGE FLAGS */
1432# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1433
1434 /* update statistics. */
1435 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1436 pShwPage->cPresent++;
1437 if (pShwPage->iFirstPresent > iPTDst)
1438 pShwPage->iFirstPresent = iPTDst;
1439}
1440#endif /* PGMPOOL_WITH_USER_TRACKING */
1441
1442
1443/**
1444 * Creates a 4K shadow page for a guest page.
1445 *
1446 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1447 * physical address. The PdeSrc argument only the flags are used. No page structured
1448 * will be mapped in this function.
1449 *
1450 * @param pVM VM handle.
1451 * @param pPteDst Destination page table entry.
1452 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1453 * Can safely assume that only the flags are being used.
1454 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1455 * @param pShwPage Pointer to the shadow page.
1456 * @param iPTDst The index into the shadow table.
1457 *
1458 * @remark Not used for 2/4MB pages!
1459 */
1460DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1461{
1462 if (PteSrc.n.u1Present)
1463 {
1464 /*
1465 * Find the ram range.
1466 */
1467 PPGMPAGE pPage;
1468 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1469 if (RT_SUCCESS(rc))
1470 {
1471 /** @todo investiage PWT, PCD and PAT. */
1472 /*
1473 * Make page table entry.
1474 */
1475 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo FLAGS */
1476 SHWPTE PteDst;
1477 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1478 {
1479 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1480 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1481 {
1482#if PGM_SHW_TYPE == PGM_TYPE_EPT
1483 PteDst.u = (HCPhys & EPT_PTE_PG_MASK);
1484 PteDst.n.u1Present = 1;
1485 PteDst.n.u1Execute = 1;
1486 PteDst.n.u1IgnorePAT = 1;
1487 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1488 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1489#else
1490 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1491 | (HCPhys & X86_PTE_PAE_PG_MASK);
1492#endif
1493 }
1494 else
1495 {
1496 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", HCPhys));
1497 PteDst.u = 0;
1498 }
1499 /** @todo count these two kinds. */
1500 }
1501 else
1502 {
1503#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1504 /*
1505 * If the page or page directory entry is not marked accessed,
1506 * we mark the page not present.
1507 */
1508 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1509 {
1510 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1511 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1512 PteDst.u = 0;
1513 }
1514 else
1515 /*
1516 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1517 * when the page is modified.
1518 */
1519 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1520 {
1521 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1522 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1523 | (HCPhys & X86_PTE_PAE_PG_MASK)
1524 | PGM_PTFLAGS_TRACK_DIRTY;
1525 }
1526 else
1527#endif
1528 {
1529 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1530#if PGM_SHW_TYPE == PGM_TYPE_EPT
1531 PteDst.u = (HCPhys & EPT_PTE_PG_MASK);
1532 PteDst.n.u1Present = 1;
1533 PteDst.n.u1Write = 1;
1534 PteDst.n.u1Execute = 1;
1535 PteDst.n.u1IgnorePAT = 1;
1536 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1537 /* PteDst.n.u1Size = 0 */
1538#else
1539 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1540 | (HCPhys & X86_PTE_PAE_PG_MASK);
1541#endif
1542 }
1543 }
1544
1545#ifdef PGMPOOL_WITH_USER_TRACKING
1546 /*
1547 * Keep user track up to date.
1548 */
1549 if (PteDst.n.u1Present)
1550 {
1551 if (!pPteDst->n.u1Present)
1552 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1553 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1554 {
1555 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1556 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1557 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1558 }
1559 }
1560 else if (pPteDst->n.u1Present)
1561 {
1562 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1563 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1564 }
1565#endif /* PGMPOOL_WITH_USER_TRACKING */
1566
1567 /*
1568 * Update statistics and commit the entry.
1569 */
1570 if (!PteSrc.n.u1Global)
1571 pShwPage->fSeenNonGlobal = true;
1572 *pPteDst = PteDst;
1573 }
1574 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1575 /** @todo count these. */
1576 }
1577 else
1578 {
1579 /*
1580 * Page not-present.
1581 */
1582 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1583#ifdef PGMPOOL_WITH_USER_TRACKING
1584 /* Keep user track up to date. */
1585 if (pPteDst->n.u1Present)
1586 {
1587 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1588 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1589 }
1590#endif /* PGMPOOL_WITH_USER_TRACKING */
1591 pPteDst->u = 0;
1592 /** @todo count these. */
1593 }
1594}
1595
1596
1597/**
1598 * Syncs a guest OS page.
1599 *
1600 * There are no conflicts at this point, neither is there any need for
1601 * page table allocations.
1602 *
1603 * @returns VBox status code.
1604 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1605 * @param pVM VM handle.
1606 * @param PdeSrc Page directory entry of the guest.
1607 * @param GCPtrPage Guest context page address.
1608 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1609 * @param uErr Fault error (X86_TRAP_PF_*).
1610 */
1611PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1612{
1613 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1614
1615#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1616 || PGM_GST_TYPE == PGM_TYPE_PAE \
1617 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1618 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1619 && PGM_SHW_TYPE != PGM_TYPE_EPT
1620
1621# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1622 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1623# endif
1624
1625 /*
1626 * Assert preconditions.
1627 */
1628 Assert(PdeSrc.n.u1Present);
1629 Assert(cPages);
1630 STAM_COUNTER_INC(&pVM->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1631
1632 /*
1633 * Get the shadow PDE, find the shadow page table in the pool.
1634 */
1635# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1636 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1637 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
1638
1639# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1640 /* Fetch the pgm pool shadow descriptor. */
1641 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
1642 Assert(pShwPde);
1643# endif
1644
1645# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1646
1647# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
1648 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1649 PPGMPOOLPAGE pShwPde;
1650 PX86PDPAE pPDDst;
1651
1652 /* Fetch the pgm pool shadow descriptor. */
1653 int rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
1654 AssertRCSuccessReturn(rc, rc);
1655 Assert(pShwPde);
1656
1657 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1658 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1659# else
1660 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) /*& SHW_PD_MASK - only pool index atm! */;
1661 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1662 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s); NOREF(pPdptDst);
1663 PX86PDEPAE pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
1664 AssertReturn(pPdeDst, VERR_INTERNAL_ERROR);
1665# endif
1666# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1667 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1668 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1669 PX86PDPAE pPDDst;
1670 PX86PDPT pPdptDst;
1671
1672 int rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1673 AssertRCSuccessReturn(rc, rc);
1674 Assert(pPDDst && pPdptDst);
1675 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1676# endif
1677
1678 SHWPDE PdeDst = *pPdeDst;
1679 AssertMsg(PdeDst.n.u1Present, ("%p=%llx\n", pPdeDst, (uint64_t)PdeDst.u));
1680 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1681
1682# if PGM_GST_TYPE == PGM_TYPE_AMD64
1683 /* Fetch the pgm pool shadow descriptor. */
1684 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1685 Assert(pShwPde);
1686# endif
1687
1688 /*
1689 * Check that the page is present and that the shadow PDE isn't out of sync.
1690 */
1691# if PGM_GST_TYPE == PGM_TYPE_AMD64
1692 const bool fBigPage = PdeSrc.b.u1Size;
1693# else
1694 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1695# endif
1696 RTGCPHYS GCPhys;
1697 if (!fBigPage)
1698 {
1699 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1700# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1701 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1702 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1703# endif
1704 }
1705 else
1706 {
1707 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1708# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1709 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1710 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1711# endif
1712 }
1713 if ( pShwPage->GCPhys == GCPhys
1714 && PdeSrc.n.u1Present
1715 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1716 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1717# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1718 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1719# endif
1720 )
1721 {
1722 /*
1723 * Check that the PDE is marked accessed already.
1724 * Since we set the accessed bit *before* getting here on a #PF, this
1725 * check is only meant for dealing with non-#PF'ing paths.
1726 */
1727 if (PdeSrc.n.u1Accessed)
1728 {
1729 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1730 if (!fBigPage)
1731 {
1732 /*
1733 * 4KB Page - Map the guest page table.
1734 */
1735 PGSTPT pPTSrc;
1736 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1737 if (RT_SUCCESS(rc))
1738 {
1739# ifdef PGM_SYNC_N_PAGES
1740 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1741 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1742 {
1743 /*
1744 * This code path is currently only taken when the caller is PGMTrap0eHandler
1745 * for non-present pages!
1746 *
1747 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1748 * deal with locality.
1749 */
1750 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1751# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1752 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1753 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1754# else
1755 const unsigned offPTSrc = 0;
1756# endif
1757 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1758 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1759 iPTDst = 0;
1760 else
1761 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1762 for (; iPTDst < iPTDstEnd; iPTDst++)
1763 {
1764 if (!pPTDst->a[iPTDst].n.u1Present)
1765 {
1766 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1767 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1768 NOREF(GCPtrCurPage);
1769#ifndef IN_RING0
1770 /*
1771 * Assuming kernel code will be marked as supervisor - and not as user level
1772 * and executed using a conforming code selector - And marked as readonly.
1773 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1774 */
1775 PPGMPAGE pPage;
1776 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1777 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1778 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)GCPtrCurPage)
1779 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1780 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1781 )
1782#endif /* else: CSAM not active */
1783 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1784 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1785 GCPtrCurPage, PteSrc.n.u1Present,
1786 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1787 PteSrc.n.u1User & PdeSrc.n.u1User,
1788 (uint64_t)PteSrc.u,
1789 (uint64_t)pPTDst->a[iPTDst].u,
1790 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1791 }
1792 }
1793 }
1794 else
1795# endif /* PGM_SYNC_N_PAGES */
1796 {
1797 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1798 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1799 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1800 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1801 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1802 GCPtrPage, PteSrc.n.u1Present,
1803 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1804 PteSrc.n.u1User & PdeSrc.n.u1User,
1805 (uint64_t)PteSrc.u,
1806 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1807 }
1808 }
1809 else /* MMIO or invalid page: emulated in #PF handler. */
1810 {
1811 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1812 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1813 }
1814 }
1815 else
1816 {
1817 /*
1818 * 4/2MB page - lazy syncing shadow 4K pages.
1819 * (There are many causes of getting here, it's no longer only CSAM.)
1820 */
1821 /* Calculate the GC physical address of this 4KB shadow page. */
1822 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1823 /* Find ram range. */
1824 PPGMPAGE pPage;
1825 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1826 if (RT_SUCCESS(rc))
1827 {
1828 /*
1829 * Make shadow PTE entry.
1830 */
1831 const RTHCPHYS HCPhys = pPage->HCPhys; /** @todo PAGE FLAGS */
1832 SHWPTE PteDst;
1833 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1834 | (HCPhys & X86_PTE_PAE_PG_MASK);
1835 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1836 {
1837 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1838 PteDst.n.u1Write = 0;
1839 else
1840 PteDst.u = 0;
1841 }
1842 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1843# ifdef PGMPOOL_WITH_USER_TRACKING
1844 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1845 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst);
1846# endif
1847 pPTDst->a[iPTDst] = PteDst;
1848
1849
1850 /*
1851 * If the page is not flagged as dirty and is writable, then make it read-only
1852 * at PD level, so we can set the dirty bit when the page is modified.
1853 *
1854 * ASSUMES that page access handlers are implemented on page table entry level.
1855 * Thus we will first catch the dirty access and set PDE.D and restart. If
1856 * there is an access handler, we'll trap again and let it work on the problem.
1857 */
1858 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1859 * As for invlpg, it simply frees the whole shadow PT.
1860 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1861 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1862 {
1863 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1864 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1865 PdeDst.n.u1Write = 0;
1866 }
1867 else
1868 {
1869 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1870 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1871 }
1872 *pPdeDst = PdeDst;
1873 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1874 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1875 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1876 }
1877 else
1878 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1879 }
1880 return VINF_SUCCESS;
1881 }
1882 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1883 }
1884 else
1885 {
1886 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1887 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1888 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1889 }
1890
1891 /*
1892 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1893 * Yea, I'm lazy.
1894 */
1895 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1896# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
1897 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1898# else
1899 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPDDst);
1900# endif
1901
1902 pPdeDst->u = 0;
1903 PGM_INVL_GUEST_TLBS();
1904 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1905
1906#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1907 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1908 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
1909
1910# ifdef PGM_SYNC_N_PAGES
1911 /*
1912 * Get the shadow PDE, find the shadow page table in the pool.
1913 */
1914# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1915 X86PDE PdeDst = pgmShwGet32BitPDE(&pVM->pgm.s, GCPtrPage);
1916
1917# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1918 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVM->pgm.s, GCPtrPage);
1919
1920# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1921 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1922 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1923 PX86PDPAE pPDDst;
1924 X86PDEPAE PdeDst;
1925 PX86PDPT pPdptDst;
1926
1927 int rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1928 AssertRCSuccessReturn(rc, rc);
1929 Assert(pPDDst && pPdptDst);
1930 PdeDst = pPDDst->a[iPDDst];
1931# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1932 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1933 PEPTPD pPDDst;
1934 EPTPDE PdeDst;
1935
1936 int rc = pgmShwGetEPTPDPtr(pVM, GCPtrPage, NULL, &pPDDst);
1937 if (rc != VINF_SUCCESS)
1938 {
1939 AssertRC(rc);
1940 return rc;
1941 }
1942 Assert(pPDDst);
1943 PdeDst = pPDDst->a[iPDDst];
1944# endif
1945 AssertMsg(PdeDst.n.u1Present, ("%#llx\n", (uint64_t)PdeDst.u));
1946 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1947 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1948
1949 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1950 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1951 {
1952 /*
1953 * This code path is currently only taken when the caller is PGMTrap0eHandler
1954 * for non-present pages!
1955 *
1956 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1957 * deal with locality.
1958 */
1959 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1960 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1961 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1962 iPTDst = 0;
1963 else
1964 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1965 for (; iPTDst < iPTDstEnd; iPTDst++)
1966 {
1967 if (!pPTDst->a[iPTDst].n.u1Present)
1968 {
1969 GSTPTE PteSrc;
1970
1971 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
1972
1973 /* Fake the page table entry */
1974 PteSrc.u = GCPtrCurPage;
1975 PteSrc.n.u1Present = 1;
1976 PteSrc.n.u1Dirty = 1;
1977 PteSrc.n.u1Accessed = 1;
1978 PteSrc.n.u1Write = 1;
1979 PteSrc.n.u1User = 1;
1980
1981 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1982
1983 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1984 GCPtrCurPage, PteSrc.n.u1Present,
1985 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1986 PteSrc.n.u1User & PdeSrc.n.u1User,
1987 (uint64_t)PteSrc.u,
1988 (uint64_t)pPTDst->a[iPTDst].u,
1989 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1990 }
1991 else
1992 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
1993 }
1994 }
1995 else
1996# endif /* PGM_SYNC_N_PAGES */
1997 {
1998 GSTPTE PteSrc;
1999 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2000 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2001
2002 /* Fake the page table entry */
2003 PteSrc.u = GCPtrCurPage;
2004 PteSrc.n.u1Present = 1;
2005 PteSrc.n.u1Dirty = 1;
2006 PteSrc.n.u1Accessed = 1;
2007 PteSrc.n.u1Write = 1;
2008 PteSrc.n.u1User = 1;
2009 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2010
2011 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2012 GCPtrPage, PteSrc.n.u1Present,
2013 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2014 PteSrc.n.u1User & PdeSrc.n.u1User,
2015 (uint64_t)PteSrc.u,
2016 (uint64_t)pPTDst->a[iPTDst].u,
2017 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2018 }
2019 return VINF_SUCCESS;
2020
2021#else
2022 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2023 return VERR_INTERNAL_ERROR;
2024#endif
2025}
2026
2027
2028#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2029/**
2030 * Investigate page fault and handle write protection page faults caused by
2031 * dirty bit tracking.
2032 *
2033 * @returns VBox status code.
2034 * @param pVM VM handle.
2035 * @param uErr Page fault error code.
2036 * @param pPdeDst Shadow page directory entry.
2037 * @param pPdeSrc Guest page directory entry.
2038 * @param GCPtrPage Guest context page address.
2039 */
2040PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2041{
2042 bool fWriteProtect = !!(CPUMGetGuestCR0(pVM) & X86_CR0_WP);
2043 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2044 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2045# if PGM_GST_TYPE == PGM_TYPE_AMD64
2046 bool fBigPagesSupported = true;
2047# else
2048 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2049# endif
2050# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2051 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
2052# endif
2053 unsigned uPageFaultLevel;
2054 int rc;
2055
2056 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2057 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2058
2059# if PGM_GST_TYPE == PGM_TYPE_PAE \
2060 || PGM_GST_TYPE == PGM_TYPE_AMD64
2061
2062# if PGM_GST_TYPE == PGM_TYPE_AMD64
2063 PX86PML4E pPml4eSrc;
2064 PX86PDPE pPdpeSrc;
2065
2066 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc);
2067 Assert(pPml4eSrc);
2068
2069 /*
2070 * Real page fault? (PML4E level)
2071 */
2072 if ( (uErr & X86_TRAP_PF_RSVD)
2073 || !pPml4eSrc->n.u1Present
2074 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
2075 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2076 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2077 )
2078 {
2079 uPageFaultLevel = 0;
2080 goto l_UpperLevelPageFault;
2081 }
2082 Assert(pPdpeSrc);
2083
2084# else /* PAE */
2085 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVM->pgm.s, GCPtrPage);
2086# endif /* PAE */
2087
2088 /*
2089 * Real page fault? (PDPE level)
2090 */
2091 if ( (uErr & X86_TRAP_PF_RSVD)
2092 || !pPdpeSrc->n.u1Present
2093# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2094 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
2095 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
2096 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2097# endif
2098 )
2099 {
2100 uPageFaultLevel = 1;
2101 goto l_UpperLevelPageFault;
2102 }
2103# endif
2104
2105 /*
2106 * Real page fault? (PDE level)
2107 */
2108 if ( (uErr & X86_TRAP_PF_RSVD)
2109 || !pPdeSrc->n.u1Present
2110# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2111 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
2112# endif
2113 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2114 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2115 {
2116 uPageFaultLevel = 2;
2117 goto l_UpperLevelPageFault;
2118 }
2119
2120 /*
2121 * First check the easy case where the page directory has been marked read-only to track
2122 * the dirty bit of an emulated BIG page
2123 */
2124 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2125 {
2126 /* Mark guest page directory as accessed */
2127# if PGM_GST_TYPE == PGM_TYPE_AMD64
2128 pPml4eSrc->n.u1Accessed = 1;
2129 pPdpeSrc->lm.u1Accessed = 1;
2130# endif
2131 pPdeSrc->b.u1Accessed = 1;
2132
2133 /*
2134 * Only write protection page faults are relevant here.
2135 */
2136 if (fWriteFault)
2137 {
2138 /* Mark guest page directory as dirty (BIG page only). */
2139 pPdeSrc->b.u1Dirty = 1;
2140
2141 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2142 {
2143 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2144
2145 Assert(pPdeSrc->b.u1Write);
2146
2147 pPdeDst->n.u1Write = 1;
2148 pPdeDst->n.u1Accessed = 1;
2149 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2150 PGM_INVL_BIG_PG(GCPtrPage);
2151 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2152 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2153 }
2154 }
2155 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2156 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2157 }
2158 /* else: 4KB page table */
2159
2160 /*
2161 * Map the guest page table.
2162 */
2163 PGSTPT pPTSrc;
2164 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2165 if (RT_SUCCESS(rc))
2166 {
2167 /*
2168 * Real page fault?
2169 */
2170 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2171 const GSTPTE PteSrc = *pPteSrc;
2172 if ( !PteSrc.n.u1Present
2173# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2174 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
2175# endif
2176 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
2177 || (fUserLevelFault && !PteSrc.n.u1User)
2178 )
2179 {
2180 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2181 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2182 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2183
2184 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2185 * See the 2nd case above as well.
2186 */
2187 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2188 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2189
2190 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2191 return VINF_EM_RAW_GUEST_TRAP;
2192 }
2193 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2194
2195 /*
2196 * Set the accessed bits in the page directory and the page table.
2197 */
2198# if PGM_GST_TYPE == PGM_TYPE_AMD64
2199 pPml4eSrc->n.u1Accessed = 1;
2200 pPdpeSrc->lm.u1Accessed = 1;
2201# endif
2202 pPdeSrc->n.u1Accessed = 1;
2203 pPteSrc->n.u1Accessed = 1;
2204
2205 /*
2206 * Only write protection page faults are relevant here.
2207 */
2208 if (fWriteFault)
2209 {
2210 /* Write access, so mark guest entry as dirty. */
2211# ifdef VBOX_WITH_STATISTICS
2212 if (!pPteSrc->n.u1Dirty)
2213 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2214 else
2215 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2216# endif
2217
2218 pPteSrc->n.u1Dirty = 1;
2219
2220 if (pPdeDst->n.u1Present)
2221 {
2222#ifndef IN_RING0
2223 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2224 * Our individual shadow handlers will provide more information and force a fatal exit.
2225 */
2226 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2227 {
2228 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2229 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2230 return VINF_SUCCESS;
2231 }
2232#endif
2233 /*
2234 * Map shadow page table.
2235 */
2236 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2237 if (pShwPage)
2238 {
2239 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2240 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2241 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
2242 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
2243 {
2244 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2245# ifdef VBOX_STRICT
2246 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2247 if (pPage)
2248 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
2249 ("Unexpected dirty bit tracking on monitored page %RGv (phys %RGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
2250# endif
2251 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2252
2253 Assert(pPteSrc->n.u1Write);
2254
2255 pPteDst->n.u1Write = 1;
2256 pPteDst->n.u1Dirty = 1;
2257 pPteDst->n.u1Accessed = 1;
2258 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2259 PGM_INVL_PG(GCPtrPage);
2260
2261 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2262 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2263 }
2264 }
2265 else
2266 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2267 }
2268 }
2269/** @todo Optimize accessed bit emulation? */
2270# ifdef VBOX_STRICT
2271 /*
2272 * Sanity check.
2273 */
2274 else if ( !pPteSrc->n.u1Dirty
2275 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2276 && pPdeDst->n.u1Present)
2277 {
2278 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2279 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2280 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2281 if ( pPteDst->n.u1Present
2282 && pPteDst->n.u1Write)
2283 LogFlow(("Writable present page %RGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2284 }
2285# endif /* VBOX_STRICT */
2286 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2287 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2288 }
2289 AssertRC(rc);
2290 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2291 return rc;
2292
2293
2294l_UpperLevelPageFault:
2295 /*
2296 * Pagefault detected while checking the PML4E, PDPE or PDE.
2297 * Single exit handler to get rid of duplicate code paths.
2298 */
2299 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2300 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2301 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2302
2303 if (
2304# if PGM_GST_TYPE == PGM_TYPE_AMD64
2305 pPml4eSrc->n.u1Present &&
2306# endif
2307# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2308 pPdpeSrc->n.u1Present &&
2309# endif
2310 pPdeSrc->n.u1Present)
2311 {
2312 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2313 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2314 {
2315 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2316 }
2317 else
2318 {
2319 /*
2320 * Map the guest page table.
2321 */
2322 PGSTPT pPTSrc;
2323 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2324 if (RT_SUCCESS(rc))
2325 {
2326 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2327 const GSTPTE PteSrc = *pPteSrc;
2328 if (pPteSrc->n.u1Present)
2329 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2330 }
2331 AssertRC(rc);
2332 }
2333 }
2334 return VINF_EM_RAW_GUEST_TRAP;
2335}
2336#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2337
2338
2339/**
2340 * Sync a shadow page table.
2341 *
2342 * The shadow page table is not present. This includes the case where
2343 * there is a conflict with a mapping.
2344 *
2345 * @returns VBox status code.
2346 * @param pVM VM handle.
2347 * @param iPD Page directory index.
2348 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2349 * Assume this is a temporary mapping.
2350 * @param GCPtrPage GC Pointer of the page that caused the fault
2351 */
2352PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2353{
2354 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2355 STAM_COUNTER_INC(&pVM->pgm.s.StatSyncPtPD[iPDSrc]);
2356 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2357
2358#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2359 || PGM_GST_TYPE == PGM_TYPE_PAE \
2360 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2361 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2362 && PGM_SHW_TYPE != PGM_TYPE_EPT
2363
2364 int rc = VINF_SUCCESS;
2365
2366 /*
2367 * Validate input a little bit.
2368 */
2369 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2370# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2371 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2372 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
2373
2374# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2375 /* Fetch the pgm pool shadow descriptor. */
2376 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
2377 Assert(pShwPde);
2378# endif
2379
2380# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2381# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2382 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2383 PPGMPOOLPAGE pShwPde;
2384 PX86PDPAE pPDDst;
2385 PSHWPDE pPdeDst;
2386
2387 /* Fetch the pgm pool shadow descriptor. */
2388 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
2389 AssertRCSuccessReturn(rc, rc);
2390 Assert(pShwPde);
2391
2392 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2393 pPdeDst = &pPDDst->a[iPDDst];
2394# else
2395 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) /*& SHW_PD_MASK - only pool index atm! */;
2396 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT); NOREF(iPdpt);
2397 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s); NOREF(pPdptDst);
2398 PSHWPDE pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
2399# endif
2400# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2401 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2402 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2403 PX86PDPAE pPDDst;
2404 PX86PDPT pPdptDst;
2405 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2406 AssertRCSuccessReturn(rc, rc);
2407 Assert(pPDDst);
2408 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2409# endif
2410 SHWPDE PdeDst = *pPdeDst;
2411
2412# if PGM_GST_TYPE == PGM_TYPE_AMD64
2413 /* Fetch the pgm pool shadow descriptor. */
2414 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2415 Assert(pShwPde);
2416# endif
2417
2418# ifndef PGM_WITHOUT_MAPPINGS
2419 /*
2420 * Check for conflicts.
2421 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2422 * HC: Simply resolve the conflict.
2423 */
2424 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2425 {
2426 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2427# ifndef IN_RING3
2428 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2429 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2430 return VERR_ADDRESS_CONFLICT;
2431# else
2432 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2433 Assert(pMapping);
2434# if PGM_GST_TYPE == PGM_TYPE_32BIT
2435 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2436# elif PGM_GST_TYPE == PGM_TYPE_PAE
2437 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2438# else
2439 AssertFailed(); /* can't happen for amd64 */
2440# endif
2441 if (RT_FAILURE(rc))
2442 {
2443 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2444 return rc;
2445 }
2446 PdeDst = *pPdeDst;
2447# endif
2448 }
2449# else /* PGM_WITHOUT_MAPPINGS */
2450 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2451# endif /* PGM_WITHOUT_MAPPINGS */
2452 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2453
2454 /*
2455 * Sync page directory entry.
2456 */
2457 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2458 if (PdeSrc.n.u1Present)
2459 {
2460 /*
2461 * Allocate & map the page table.
2462 */
2463 PSHWPT pPTDst;
2464# if PGM_GST_TYPE == PGM_TYPE_AMD64
2465 const bool fPageTable = !PdeSrc.b.u1Size;
2466# else
2467 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2468# endif
2469 PPGMPOOLPAGE pShwPage;
2470 RTGCPHYS GCPhys;
2471 if (fPageTable)
2472 {
2473 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2474# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2475 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2476 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2477# endif
2478# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2479 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2480# else
2481 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2482# endif
2483 }
2484 else
2485 {
2486 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2487# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2488 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2489 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2490# endif
2491# if PGM_GST_TYPE == PGM_TYPE_AMD64 || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2492 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, pShwPde->idx, iPDDst, &pShwPage);
2493# else
2494 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2495# endif
2496 }
2497 if (rc == VINF_SUCCESS)
2498 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2499 else if (rc == VINF_PGM_CACHED_PAGE)
2500 {
2501 /*
2502 * The PT was cached, just hook it up.
2503 */
2504 if (fPageTable)
2505 PdeDst.u = pShwPage->Core.Key
2506 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2507 else
2508 {
2509 PdeDst.u = pShwPage->Core.Key
2510 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2511 /* (see explanation and assumptions further down.) */
2512 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2513 {
2514 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2515 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2516 PdeDst.b.u1Write = 0;
2517 }
2518 }
2519 *pPdeDst = PdeDst;
2520 return VINF_SUCCESS;
2521 }
2522 else if (rc == VERR_PGM_POOL_FLUSHED)
2523 {
2524 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
2525 return VINF_PGM_SYNC_CR3;
2526 }
2527 else
2528 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2529 PdeDst.u &= X86_PDE_AVL_MASK;
2530 PdeDst.u |= pShwPage->Core.Key;
2531
2532 /*
2533 * Page directory has been accessed (this is a fault situation, remember).
2534 */
2535 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2536 if (fPageTable)
2537 {
2538 /*
2539 * Page table - 4KB.
2540 *
2541 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2542 */
2543 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2544 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2545 PGSTPT pPTSrc;
2546 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2547 if (RT_SUCCESS(rc))
2548 {
2549 /*
2550 * Start by syncing the page directory entry so CSAM's TLB trick works.
2551 */
2552 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2553 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2554 *pPdeDst = PdeDst;
2555
2556 /*
2557 * Directory/page user or supervisor privilege: (same goes for read/write)
2558 *
2559 * Directory Page Combined
2560 * U/S U/S U/S
2561 * 0 0 0
2562 * 0 1 0
2563 * 1 0 0
2564 * 1 1 1
2565 *
2566 * Simple AND operation. Table listed for completeness.
2567 *
2568 */
2569 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2570# ifdef PGM_SYNC_N_PAGES
2571 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2572 unsigned iPTDst = iPTBase;
2573 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2574 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2575 iPTDst = 0;
2576 else
2577 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2578# else /* !PGM_SYNC_N_PAGES */
2579 unsigned iPTDst = 0;
2580 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2581# endif /* !PGM_SYNC_N_PAGES */
2582# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2583 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2584 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2585# else
2586 const unsigned offPTSrc = 0;
2587# endif
2588 for (; iPTDst < iPTDstEnd; iPTDst++)
2589 {
2590 const unsigned iPTSrc = iPTDst + offPTSrc;
2591 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2592
2593 if (PteSrc.n.u1Present) /* we've already cleared it above */
2594 {
2595# ifndef IN_RING0
2596 /*
2597 * Assuming kernel code will be marked as supervisor - and not as user level
2598 * and executed using a conforming code selector - And marked as readonly.
2599 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2600 */
2601 PPGMPAGE pPage;
2602 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2603 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2604 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2605 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2606 )
2607# endif
2608 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2609 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2610 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2611 PteSrc.n.u1Present,
2612 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2613 PteSrc.n.u1User & PdeSrc.n.u1User,
2614 (uint64_t)PteSrc.u,
2615 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2616 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2617 }
2618 } /* for PTEs */
2619 }
2620 }
2621 else
2622 {
2623 /*
2624 * Big page - 2/4MB.
2625 *
2626 * We'll walk the ram range list in parallel and optimize lookups.
2627 * We will only sync on shadow page table at a time.
2628 */
2629 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2630
2631 /**
2632 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2633 */
2634
2635 /*
2636 * Start by syncing the page directory entry.
2637 */
2638 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2639 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2640
2641 /*
2642 * If the page is not flagged as dirty and is writable, then make it read-only
2643 * at PD level, so we can set the dirty bit when the page is modified.
2644 *
2645 * ASSUMES that page access handlers are implemented on page table entry level.
2646 * Thus we will first catch the dirty access and set PDE.D and restart. If
2647 * there is an access handler, we'll trap again and let it work on the problem.
2648 */
2649 /** @todo move the above stuff to a section in the PGM documentation. */
2650 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2651 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2652 {
2653 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2654 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2655 PdeDst.b.u1Write = 0;
2656 }
2657 *pPdeDst = PdeDst;
2658
2659 /*
2660 * Fill the shadow page table.
2661 */
2662 /* Get address and flags from the source PDE. */
2663 SHWPTE PteDstBase;
2664 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2665
2666 /* Loop thru the entries in the shadow PT. */
2667 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2668 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2669 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2670 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2671 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2672 unsigned iPTDst = 0;
2673 while (iPTDst < RT_ELEMENTS(pPTDst->a))
2674 {
2675 /* Advance ram range list. */
2676 while (pRam && GCPhys > pRam->GCPhysLast)
2677 pRam = pRam->CTX_SUFF(pNext);
2678 if (pRam && GCPhys >= pRam->GCPhys)
2679 {
2680 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2681 do
2682 {
2683 /* Make shadow PTE. */
2684 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2685 SHWPTE PteDst;
2686
2687 /* Make sure the RAM has already been allocated. */
2688 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC) /** @todo PAGE FLAGS */
2689 {
2690 if (RT_UNLIKELY(!PGM_PAGE_GET_HCPHYS(pPage)))
2691 {
2692# ifdef IN_RING3
2693 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2694# else
2695 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2696# endif
2697 if (rc != VINF_SUCCESS)
2698 return rc;
2699 }
2700 }
2701
2702 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2703 {
2704 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2705 {
2706 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2707 PteDst.n.u1Write = 0;
2708 }
2709 else
2710 PteDst.u = 0;
2711 }
2712# ifndef IN_RING0
2713 /*
2714 * Assuming kernel code will be marked as supervisor and not as user level and executed
2715 * using a conforming code selector. Don't check for readonly, as that implies the whole
2716 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2717 */
2718 else if ( !PdeSrc.n.u1User
2719 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2720 PteDst.u = 0;
2721# endif
2722 else
2723 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2724# ifdef PGMPOOL_WITH_USER_TRACKING
2725 if (PteDst.n.u1Present)
2726 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, pPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT, pPage, iPTDst); /** @todo PAGE FLAGS */
2727# endif
2728 /* commit it */
2729 pPTDst->a[iPTDst] = PteDst;
2730 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2731 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2732 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2733
2734 /* advance */
2735 GCPhys += PAGE_SIZE;
2736 iHCPage++;
2737 iPTDst++;
2738 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2739 && GCPhys <= pRam->GCPhysLast);
2740 }
2741 else if (pRam)
2742 {
2743 Log(("Invalid pages at %RGp\n", GCPhys));
2744 do
2745 {
2746 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2747 GCPhys += PAGE_SIZE;
2748 iPTDst++;
2749 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2750 && GCPhys < pRam->GCPhys);
2751 }
2752 else
2753 {
2754 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2755 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2756 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2757 }
2758 } /* while more PTEs */
2759 } /* 4KB / 4MB */
2760 }
2761 else
2762 AssertRelease(!PdeDst.n.u1Present);
2763
2764 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2765 if (RT_FAILURE(rc))
2766 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2767 return rc;
2768
2769#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2770 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2771 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
2772
2773
2774 /*
2775 * Validate input a little bit.
2776 */
2777 int rc = VINF_SUCCESS;
2778# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2779 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2780 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
2781
2782# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2783 /* Fetch the pgm pool shadow descriptor. */
2784 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
2785 Assert(pShwPde);
2786# endif
2787
2788# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2789# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2790 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2791 PPGMPOOLPAGE pShwPde;
2792 PX86PDPAE pPDDst;
2793 PSHWPDE pPdeDst;
2794
2795 /* Fetch the pgm pool shadow descriptor. */
2796 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
2797 AssertRCSuccessReturn(rc, rc);
2798 Assert(pShwPde);
2799
2800 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2801 pPdeDst = &pPDDst->a[iPDDst];
2802# else
2803 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) /*& SHW_PD_MASK - only pool index atm!*/;
2804 PX86PDEPAE pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
2805# endif
2806
2807# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2808 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2809 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2810 PX86PDPAE pPDDst;
2811 PX86PDPT pPdptDst;
2812 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2813 AssertRCSuccessReturn(rc, rc);
2814 Assert(pPDDst);
2815 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2816
2817 /* Fetch the pgm pool shadow descriptor. */
2818 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2819 Assert(pShwPde);
2820
2821# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2822 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2823 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2824 PEPTPD pPDDst;
2825 PEPTPDPT pPdptDst;
2826
2827 rc = pgmShwGetEPTPDPtr(pVM, GCPtrPage, &pPdptDst, &pPDDst);
2828 if (rc != VINF_SUCCESS)
2829 {
2830 AssertRC(rc);
2831 return rc;
2832 }
2833 Assert(pPDDst);
2834 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2835
2836 /* Fetch the pgm pool shadow descriptor. */
2837 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2838 Assert(pShwPde);
2839# endif
2840 SHWPDE PdeDst = *pPdeDst;
2841
2842 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2843 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2844
2845 GSTPDE PdeSrc;
2846 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2847 PdeSrc.n.u1Present = 1;
2848 PdeSrc.n.u1Write = 1;
2849 PdeSrc.n.u1Accessed = 1;
2850 PdeSrc.n.u1User = 1;
2851
2852 /*
2853 * Allocate & map the page table.
2854 */
2855 PSHWPT pPTDst;
2856 PPGMPOOLPAGE pShwPage;
2857 RTGCPHYS GCPhys;
2858
2859 /* Virtual address = physical address */
2860 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
2861# if PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_EPT || defined(VBOX_WITH_PGMPOOL_PAGING_ONLY)
2862 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2863# else
2864 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, SHW_POOL_ROOT_IDX, iPDDst, &pShwPage);
2865# endif
2866
2867 if ( rc == VINF_SUCCESS
2868 || rc == VINF_PGM_CACHED_PAGE)
2869 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2870 else
2871 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2872
2873 PdeDst.u &= X86_PDE_AVL_MASK;
2874 PdeDst.u |= pShwPage->Core.Key;
2875 PdeDst.n.u1Present = 1;
2876 PdeDst.n.u1Write = 1;
2877# if PGM_SHW_TYPE == PGM_TYPE_EPT
2878 PdeDst.n.u1Execute = 1;
2879# else
2880 PdeDst.n.u1User = 1;
2881 PdeDst.n.u1Accessed = 1;
2882# endif
2883 *pPdeDst = PdeDst;
2884
2885 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2886 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2887 return rc;
2888
2889#else
2890 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
2891 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2892 return VERR_INTERNAL_ERROR;
2893#endif
2894}
2895
2896
2897
2898/**
2899 * Prefetch a page/set of pages.
2900 *
2901 * Typically used to sync commonly used pages before entering raw mode
2902 * after a CR3 reload.
2903 *
2904 * @returns VBox status code.
2905 * @param pVM VM handle.
2906 * @param GCPtrPage Page to invalidate.
2907 */
2908PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCPTR GCPtrPage)
2909{
2910#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2911 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
2912 /*
2913 * Check that all Guest levels thru the PDE are present, getting the
2914 * PD and PDE in the processes.
2915 */
2916 int rc = VINF_SUCCESS;
2917# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2918# if PGM_GST_TYPE == PGM_TYPE_32BIT
2919 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
2920 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
2921# elif PGM_GST_TYPE == PGM_TYPE_PAE
2922 unsigned iPDSrc;
2923# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2924 X86PDPE PdpeSrc;
2925 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
2926# else
2927 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, NULL);
2928# endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
2929 if (!pPDSrc)
2930 return VINF_SUCCESS; /* not present */
2931# elif PGM_GST_TYPE == PGM_TYPE_AMD64
2932 unsigned iPDSrc;
2933 PX86PML4E pPml4eSrc;
2934 X86PDPE PdpeSrc;
2935 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
2936 if (!pPDSrc)
2937 return VINF_SUCCESS; /* not present */
2938# endif
2939 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2940# else
2941 PGSTPD pPDSrc = NULL;
2942 const unsigned iPDSrc = 0;
2943 GSTPDE PdeSrc;
2944
2945 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2946 PdeSrc.n.u1Present = 1;
2947 PdeSrc.n.u1Write = 1;
2948 PdeSrc.n.u1Accessed = 1;
2949 PdeSrc.n.u1User = 1;
2950# endif
2951
2952 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
2953 {
2954# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2955 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVM->pgm.s, GCPtrPage);
2956# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2957# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
2958 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2959 PX86PDPAE pPDDst;
2960 X86PDEPAE PdeDst;
2961# if PGM_GST_TYPE != PGM_TYPE_PAE
2962 X86PDPE PdpeSrc;
2963
2964 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
2965 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
2966# endif
2967 int rc = pgmShwSyncPaePDPtr(pVM, GCPtrPage, &PdpeSrc, &pPDDst);
2968 if (rc != VINF_SUCCESS)
2969 {
2970 AssertRC(rc);
2971 return rc;
2972 }
2973 Assert(pPDDst);
2974 PdeDst = pPDDst->a[iPDDst];
2975# else
2976 const X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVM->pgm.s, GCPtrPage);
2977# endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
2978
2979# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2980 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2981 PX86PDPAE pPDDst;
2982 X86PDEPAE PdeDst;
2983
2984# if PGM_GST_TYPE == PGM_TYPE_PROT
2985 /* AMD-V nested paging */
2986 X86PML4E Pml4eSrc;
2987 X86PDPE PdpeSrc;
2988 PX86PML4E pPml4eSrc = &Pml4eSrc;
2989
2990 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
2991 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
2992 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
2993# endif
2994
2995 int rc = pgmShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
2996 if (rc != VINF_SUCCESS)
2997 {
2998 AssertRC(rc);
2999 return rc;
3000 }
3001 Assert(pPDDst);
3002 PdeDst = pPDDst->a[iPDDst];
3003# endif
3004 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3005 {
3006 if (!PdeDst.n.u1Present)
3007 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3008 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
3009 else
3010 {
3011 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3012 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3013 * makes no sense to prefetch more than one page.
3014 */
3015 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
3016 if (RT_SUCCESS(rc))
3017 rc = VINF_SUCCESS;
3018 }
3019 }
3020 }
3021 return rc;
3022
3023#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3024 return VINF_SUCCESS; /* ignore */
3025#endif
3026}
3027
3028
3029
3030
3031/**
3032 * Syncs a page during a PGMVerifyAccess() call.
3033 *
3034 * @returns VBox status code (informational included).
3035 * @param GCPtrPage The address of the page to sync.
3036 * @param fPage The effective guest page flags.
3037 * @param uErr The trap error code.
3038 */
3039PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3040{
3041 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3042
3043 Assert(!HWACCMIsNestedPagingActive(pVM));
3044#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3045 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3046
3047# ifndef IN_RING0
3048 if (!(fPage & X86_PTE_US))
3049 {
3050 /*
3051 * Mark this page as safe.
3052 */
3053 /** @todo not correct for pages that contain both code and data!! */
3054 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3055 CSAMMarkPage(pVM, (RTRCPTR)GCPtrPage, true);
3056 }
3057# endif
3058
3059 /*
3060 * Get guest PD and index.
3061 */
3062# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3063# if PGM_GST_TYPE == PGM_TYPE_32BIT
3064 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3065 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3066# elif PGM_GST_TYPE == PGM_TYPE_PAE
3067 unsigned iPDSrc;
3068# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3069 X86PDPE PdpeSrc;
3070 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3071# else
3072 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, NULL);
3073# endif
3074
3075 if (pPDSrc)
3076 {
3077 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3078 return VINF_EM_RAW_GUEST_TRAP;
3079 }
3080# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3081 unsigned iPDSrc;
3082 PX86PML4E pPml4eSrc;
3083 X86PDPE PdpeSrc;
3084 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3085 if (!pPDSrc)
3086 {
3087 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3088 return VINF_EM_RAW_GUEST_TRAP;
3089 }
3090# endif
3091# else
3092 PGSTPD pPDSrc = NULL;
3093 const unsigned iPDSrc = 0;
3094# endif
3095 int rc = VINF_SUCCESS;
3096
3097 /*
3098 * First check if the shadow pd is present.
3099 */
3100# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3101 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
3102# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3103 PX86PDEPAE pPdeDst;
3104# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3105 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3106 PX86PDPAE pPDDst;
3107# if PGM_GST_TYPE != PGM_TYPE_PAE
3108 X86PDPE PdpeSrc;
3109
3110 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3111 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3112# endif
3113 rc = pgmShwSyncPaePDPtr(pVM, GCPtrPage, &PdpeSrc, &pPDDst);
3114 if (rc != VINF_SUCCESS)
3115 {
3116 AssertRC(rc);
3117 return rc;
3118 }
3119 Assert(pPDDst);
3120 pPdeDst = &pPDDst->a[iPDDst];
3121# else
3122 pPdeDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrPage);
3123# endif
3124# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3125 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3126 PX86PDPAE pPDDst;
3127 PX86PDEPAE pPdeDst;
3128
3129# if PGM_GST_TYPE == PGM_TYPE_PROT
3130 /* AMD-V nested paging */
3131 X86PML4E Pml4eSrc;
3132 X86PDPE PdpeSrc;
3133 PX86PML4E pPml4eSrc = &Pml4eSrc;
3134
3135 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3136 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3137 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3138# endif
3139
3140 rc = pgmShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3141 if (rc != VINF_SUCCESS)
3142 {
3143 AssertRC(rc);
3144 return rc;
3145 }
3146 Assert(pPDDst);
3147 pPdeDst = &pPDDst->a[iPDDst];
3148# endif
3149 if (!pPdeDst->n.u1Present)
3150 {
3151 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
3152 AssertRC(rc);
3153 if (rc != VINF_SUCCESS)
3154 return rc;
3155 }
3156
3157# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3158 /* Check for dirty bit fault */
3159 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3160 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3161 Log(("PGMVerifyAccess: success (dirty)\n"));
3162 else
3163 {
3164 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3165#else
3166 {
3167 GSTPDE PdeSrc;
3168 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3169 PdeSrc.n.u1Present = 1;
3170 PdeSrc.n.u1Write = 1;
3171 PdeSrc.n.u1Accessed = 1;
3172 PdeSrc.n.u1User = 1;
3173
3174#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3175 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3176 if (uErr & X86_TRAP_PF_US)
3177 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3178 else /* supervisor */
3179 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3180
3181 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
3182 if (RT_SUCCESS(rc))
3183 {
3184 /* Page was successfully synced */
3185 Log2(("PGMVerifyAccess: success (sync)\n"));
3186 rc = VINF_SUCCESS;
3187 }
3188 else
3189 {
3190 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3191 return VINF_EM_RAW_GUEST_TRAP;
3192 }
3193 }
3194 return rc;
3195
3196#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3197
3198 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3199 return VERR_INTERNAL_ERROR;
3200#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3201}
3202
3203
3204#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3205# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
3206/**
3207 * Figures out which kind of shadow page this guest PDE warrants.
3208 *
3209 * @returns Shadow page kind.
3210 * @param pPdeSrc The guest PDE in question.
3211 * @param cr4 The current guest cr4 value.
3212 */
3213DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const GSTPDE *pPdeSrc, uint32_t cr4)
3214{
3215# if PMG_GST_TYPE == PGM_TYPE_AMD64
3216 if (!pPdeSrc->n.u1Size)
3217# else
3218 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
3219# endif
3220 return BTH_PGMPOOLKIND_PT_FOR_PT;
3221 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
3222 //{
3223 // case 0:
3224 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
3225 // case X86_PDE4M_RW:
3226 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
3227 // case X86_PDE4M_US:
3228 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
3229 // case X86_PDE4M_RW | X86_PDE4M_US:
3230 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
3231# if 0
3232 // case X86_PDE4M_PAE_NX:
3233 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
3234 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
3235 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
3236 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
3237 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
3238 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
3239 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
3240# endif
3241 return BTH_PGMPOOLKIND_PT_FOR_BIG;
3242 //}
3243}
3244# endif
3245#endif
3246
3247#undef MY_STAM_COUNTER_INC
3248#define MY_STAM_COUNTER_INC(a) do { } while (0)
3249
3250
3251/**
3252 * Syncs the paging hierarchy starting at CR3.
3253 *
3254 * @returns VBox status code, no specials.
3255 * @param pVM The virtual machine.
3256 * @param cr0 Guest context CR0 register
3257 * @param cr3 Guest context CR3 register
3258 * @param cr4 Guest context CR4 register
3259 * @param fGlobal Including global page directories or not
3260 */
3261PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3262{
3263 if (VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
3264 fGlobal = true; /* Change this CR3 reload to be a global one. */
3265
3266 LogFlow(("SyncCR3 %d\n", fGlobal));
3267
3268#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3269 /*
3270 * Update page access handlers.
3271 * The virtual are always flushed, while the physical are only on demand.
3272 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3273 * have to look into that later because it will have a bad influence on the performance.
3274 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3275 * bird: Yes, but that won't work for aliases.
3276 */
3277 /** @todo this MUST go away. See #1557. */
3278 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3279 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3280 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3281#endif
3282
3283#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3284 /*
3285 * Nested / EPT - almost no work.
3286 */
3287 /** @todo check if this is really necessary; the call does it as well... */
3288 HWACCMFlushTLB(pVM);
3289 return VINF_SUCCESS;
3290
3291#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3292 /*
3293 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3294 * out the shadow parts when the guest modifies its tables.
3295 */
3296 return VINF_SUCCESS;
3297
3298#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3299
3300# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3301 /* Nothing to do when mappings are fixed. */
3302 if (pVM->pgm.s.fMappingsFixed)
3303 return VINF_SUCCESS;
3304# endif
3305
3306 /*
3307 * PAE and 32-bit legacy mode (shadow).
3308 * (Guest PAE, 32-bit legacy, protected and real modes.)
3309 */
3310 Assert(fGlobal || (cr4 & X86_CR4_PGE));
3311 MY_STAM_COUNTER_INC(fGlobal ? &pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Global) : &pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3NotGlobal));
3312
3313# if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE
3314 bool const fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
3315
3316 /*
3317 * Get page directory addresses.
3318 */
3319# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3320 PX86PDE pPDEDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, 0);
3321# else /* PGM_SHW_TYPE == PGM_TYPE_PAE */
3322# if PGM_GST_TYPE == PGM_TYPE_32BIT
3323 PX86PDEPAE pPDEDst = NULL;
3324# endif
3325# endif
3326
3327# if PGM_GST_TYPE == PGM_TYPE_32BIT
3328 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3329 Assert(pPDSrc);
3330# if !defined(IN_RC) && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
3331 Assert(PGMPhysGCPhys2R3PtrAssert(pVM, (RTGCPHYS)(cr3 & GST_CR3_PAGE_MASK), sizeof(*pPDSrc)) == (RTR3PTR)pPDSrc);
3332# endif
3333# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3334
3335 /*
3336 * Iterate the the CR3 page.
3337 */
3338 PPGMMAPPING pMapping;
3339 unsigned iPdNoMapping;
3340 const bool fRawR0Enabled = EMIsRawRing0Enabled(pVM);
3341 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3342
3343# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3344 /* Mappings are always enabled when we get here. */
3345 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3346 pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
3347 iPdNoMapping = (pMapping) ? (pMapping->GCPtr >> GST_PD_SHIFT) : ~0U;
3348# else
3349 /* Only check mappings if they are supposed to be put into the shadow page table. */
3350 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
3351 {
3352 pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
3353 iPdNoMapping = (pMapping) ? (pMapping->GCPtr >> GST_PD_SHIFT) : ~0U;
3354 }
3355 else
3356 {
3357 pMapping = 0;
3358 iPdNoMapping = ~0U;
3359 }
3360# endif
3361
3362# if PGM_GST_TYPE == PGM_TYPE_PAE
3363 for (uint64_t iPdpt = 0; iPdpt < GST_PDPE_ENTRIES; iPdpt++)
3364 {
3365 unsigned iPDSrc;
3366 X86PDPE PdpeSrc;
3367 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, iPdpt << X86_PDPT_SHIFT, &iPDSrc, &PdpeSrc);
3368 PX86PDEPAE pPDEDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, iPdpt << X86_PDPT_SHIFT);
3369 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
3370
3371# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
3372 if (pPDSrc == NULL)
3373 {
3374 /* PDPE not present */
3375 if (pPdptDst->a[iPdpt].n.u1Present)
3376 {
3377 LogFlow(("SyncCR3: guest PDPE %lld not present; clear shw pdpe\n", iPdpt));
3378 /* for each page directory entry */
3379 for (unsigned iPD = 0; iPD < RT_ELEMENTS(pPDSrc->a); iPD++)
3380 {
3381 if ( pPDEDst[iPD].n.u1Present
3382 && !(pPDEDst[iPD].u & PGM_PDFLAGS_MAPPING))
3383 {
3384 pgmPoolFree(pVM, pPDEDst[iPD].u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPdpt * X86_PG_PAE_ENTRIES + iPD);
3385 pPDEDst[iPD].u = 0;
3386 }
3387 }
3388 }
3389 if (!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING))
3390 pPdptDst->a[iPdpt].n.u1Present = 0;
3391 continue;
3392 }
3393# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
3394# else /* PGM_GST_TYPE != PGM_TYPE_PAE */
3395 {
3396# endif /* PGM_GST_TYPE != PGM_TYPE_PAE */
3397 for (unsigned iPD = 0; iPD < RT_ELEMENTS(pPDSrc->a); iPD++)
3398 {
3399# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3400 if ((iPD & 255) == 0) /* Start of new PD. */
3401 pPDEDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, (uint32_t)iPD << GST_PD_SHIFT);
3402# endif
3403# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3404 Assert(pgmShwGet32BitPDEPtr(&pVM->pgm.s, (uint32_t)iPD << SHW_PD_SHIFT) == pPDEDst);
3405# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3406# if defined(VBOX_STRICT) && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) /* Unfortuantely not reliable with PGMR0DynMap and multiple VMs. */
3407 RTGCPTR GCPtrStrict = (uint32_t)iPD << GST_PD_SHIFT;
3408# if PGM_GST_TYPE == PGM_TYPE_PAE
3409 GCPtrStrict |= iPdpt << X86_PDPT_SHIFT;
3410# endif
3411 AssertMsg(pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrStrict) == pPDEDst, ("%p vs %p (%RGv)\n", pgmShwGetPaePDEPtr(&pVM->pgm.s, GCPtrStrict), pPDEDst, GCPtrStrict));
3412# endif /* VBOX_STRICT */
3413# endif
3414 GSTPDE PdeSrc = pPDSrc->a[iPD];
3415 if ( PdeSrc.n.u1Present
3416 && (PdeSrc.n.u1User || fRawR0Enabled))
3417 {
3418# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3419 || PGM_GST_TYPE == PGM_TYPE_PAE) \
3420 && !defined(PGM_WITHOUT_MAPPINGS)
3421
3422 /*
3423 * Check for conflicts with GC mappings.
3424 */
3425# if PGM_GST_TYPE == PGM_TYPE_PAE
3426 if (iPD + iPdpt * X86_PG_PAE_ENTRIES == iPdNoMapping)
3427# else
3428 if (iPD == iPdNoMapping)
3429# endif
3430 {
3431 if (pVM->pgm.s.fMappingsFixed)
3432 {
3433 /* It's fixed, just skip the mapping. */
3434 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
3435 Assert(PGM_GST_TYPE == PGM_TYPE_32BIT || (iPD + cPTs - 1) / X86_PG_PAE_ENTRIES == iPD / X86_PG_PAE_ENTRIES);
3436 iPD += cPTs - 1;
3437# if PGM_SHW_TYPE != PGM_GST_TYPE /* SHW==PAE && GST==32BIT */
3438 pPDEDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, (uint32_t)(iPD + 1) << GST_PD_SHIFT);
3439# else
3440 pPDEDst += cPTs;
3441# endif
3442 pMapping = pMapping->CTX_SUFF(pNext);
3443 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3444 continue;
3445 }
3446# ifdef IN_RING3
3447# if PGM_GST_TYPE == PGM_TYPE_32BIT
3448 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
3449# elif PGM_GST_TYPE == PGM_TYPE_PAE
3450 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPdpt << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
3451# endif
3452 if (RT_FAILURE(rc))
3453 return rc;
3454
3455 /*
3456 * Update iPdNoMapping and pMapping.
3457 */
3458 pMapping = pVM->pgm.s.pMappingsR3;
3459 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
3460 pMapping = pMapping->pNextR3;
3461 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3462# else /* !IN_RING3 */
3463 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3464 return VINF_PGM_SYNC_CR3;
3465# endif /* !IN_RING3 */
3466 }
3467# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3468 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3469# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3470
3471# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3472 /* advance */
3473# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3474 pPDEDst += 2;
3475# else
3476 pPDEDst++;
3477# endif
3478# else
3479 /*
3480 * Sync page directory entry.
3481 *
3482 * The current approach is to allocated the page table but to set
3483 * the entry to not-present and postpone the page table synching till
3484 * it's actually used.
3485 */
3486# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3487 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
3488# elif PGM_GST_TYPE == PGM_TYPE_PAE
3489 const unsigned iPdShw = iPD + iPdpt * X86_PG_PAE_ENTRIES; NOREF(iPdShw);
3490# else
3491 const unsigned iPdShw = iPD; NOREF(iPdShw);
3492# endif
3493 {
3494 SHWPDE PdeDst = *pPDEDst;
3495 if (PdeDst.n.u1Present)
3496 {
3497 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
3498 RTGCPHYS GCPhys;
3499 if ( !PdeSrc.b.u1Size
3500 || !fBigPagesSupported)
3501 {
3502 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
3503# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3504 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3505 GCPhys |= i * (PAGE_SIZE / 2);
3506# endif
3507 }
3508 else
3509 {
3510 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3511# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3512 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
3513 GCPhys |= i * X86_PAGE_2M_SIZE;
3514# endif
3515 }
3516
3517 if ( pShwPage->GCPhys == GCPhys
3518 && pShwPage->enmKind == PGM_BTH_NAME(CalcPageKind)(&PdeSrc, cr4)
3519 && ( pShwPage->fCached
3520 || ( !fGlobal
3521 && ( false
3522# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
3523 || ( (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
3524 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE)) /* global 2/4MB page. */
3525 || ( !pShwPage->fSeenNonGlobal
3526 && (cr4 & X86_CR4_PGE))
3527# endif
3528 )
3529 )
3530 )
3531 && ( (PdeSrc.u & (X86_PDE_US | X86_PDE_RW)) == (PdeDst.u & (X86_PDE_US | X86_PDE_RW))
3532 || ( fBigPagesSupported
3533 && ((PdeSrc.u & (X86_PDE_US | X86_PDE4M_PS | X86_PDE4M_D)) | PGM_PDFLAGS_TRACK_DIRTY)
3534 == ((PdeDst.u & (X86_PDE_US | X86_PDE_RW | PGM_PDFLAGS_TRACK_DIRTY)) | X86_PDE4M_PS))
3535 )
3536 )
3537 {
3538# ifdef VBOX_WITH_STATISTICS
3539 if ( !fGlobal
3540 && (PdeSrc.u & (X86_PDE4M_PS | X86_PDE4M_G)) == (X86_PDE4M_PS | X86_PDE4M_G)
3541 && (cr4 & (X86_CR4_PGE | X86_CR4_PSE)) == (X86_CR4_PGE | X86_CR4_PSE))
3542 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstSkippedGlobalPD));
3543 else if (!fGlobal && !pShwPage->fSeenNonGlobal && (cr4 & X86_CR4_PGE))
3544 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstSkippedGlobalPT));
3545 else
3546 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstCacheHit));
3547# endif /* VBOX_WITH_STATISTICS */
3548 /** @todo a replacement strategy isn't really needed unless we're using a very small pool < 512 pages.
3549 * The whole ageing stuff should be put in yet another set of #ifdefs. For now, let's just skip it. */
3550 //# ifdef PGMPOOL_WITH_CACHE
3551 // pgmPoolCacheUsed(pPool, pShwPage);
3552 //# endif
3553 }
3554 else
3555 {
3556 pgmPoolFreeByPage(pPool, pShwPage, SHW_POOL_ROOT_IDX, iPdShw);
3557 pPDEDst->u = 0;
3558 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstFreed));
3559 }
3560 }
3561 else
3562 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstNotPresent));
3563
3564 /* advance */
3565 pPDEDst++;
3566 } /* foreach 2MB PAE PDE in 4MB guest PDE */
3567# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
3568 }
3569# if PGM_GST_TYPE == PGM_TYPE_PAE
3570 else if (iPD + iPdpt * X86_PG_PAE_ENTRIES != iPdNoMapping)
3571# else
3572 else if (iPD != iPdNoMapping)
3573# endif
3574 {
3575 /*
3576 * Check if there is any page directory to mark not present here.
3577 */
3578# ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
3579 /* advance */
3580# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3581 pPDEDst += 2;
3582# else
3583 pPDEDst++;
3584# endif
3585# else
3586# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3587 for (unsigned i = 0, iPdShw = iPD * 2; i < 2; i++, iPdShw++) /* pray that the compiler unrolls this */
3588# elif PGM_GST_TYPE == PGM_TYPE_PAE
3589 const unsigned iPdShw = iPD + iPdpt * X86_PG_PAE_ENTRIES;
3590# else
3591 const unsigned iPdShw = iPD;
3592# endif
3593 {
3594 if (pPDEDst->n.u1Present)
3595 {
3596 pgmPoolFree(pVM, pPDEDst->u & SHW_PDE_PG_MASK, SHW_POOL_ROOT_IDX, iPdShw);
3597 pPDEDst->u = 0;
3598 MY_STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3DstFreedSrcNP));
3599 }
3600 pPDEDst++;
3601 }
3602# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
3603 }
3604 else
3605 {
3606# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3607 || PGM_GST_TYPE == PGM_TYPE_PAE) \
3608 && !defined(PGM_WITHOUT_MAPPINGS)
3609
3610 const unsigned cPTs = pMapping->cb >> GST_PD_SHIFT;
3611
3612 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3613# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
3614 if (pVM->pgm.s.fMappingsFixed)
3615 {
3616 /* It's fixed, just skip the mapping. */
3617 pMapping = pMapping->CTX_SUFF(pNext);
3618 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3619 }
3620 else
3621# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
3622 {
3623 /*
3624 * Check for conflicts for subsequent pagetables
3625 * and advance to the next mapping.
3626 */
3627 iPdNoMapping = ~0U;
3628 unsigned iPT = cPTs;
3629 while (iPT-- > 1)
3630 {
3631 if ( pPDSrc->a[iPD + iPT].n.u1Present
3632 && (pPDSrc->a[iPD + iPT].n.u1User || fRawR0Enabled))
3633 {
3634# ifdef IN_RING3
3635# if PGM_GST_TYPE == PGM_TYPE_32BIT
3636 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, iPD << GST_PD_SHIFT);
3637# elif PGM_GST_TYPE == PGM_TYPE_PAE
3638 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, (iPdpt << GST_PDPT_SHIFT) + (iPD << GST_PD_SHIFT));
3639# endif
3640 if (RT_FAILURE(rc))
3641 return rc;
3642
3643 /*
3644 * Update iPdNoMapping and pMapping.
3645 */
3646 pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
3647 while (pMapping && pMapping->GCPtr < (iPD << GST_PD_SHIFT))
3648 pMapping = pMapping->CTX_SUFF(pNext);
3649 iPdNoMapping = pMapping ? pMapping->GCPtr >> GST_PD_SHIFT : ~0U;
3650 break;
3651# else
3652 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3653 return VINF_PGM_SYNC_CR3;
3654# endif
3655 }
3656 }
3657 if (iPdNoMapping == ~0U && pMapping)
3658 {
3659 pMapping = pMapping->CTX_SUFF(pNext);
3660 if (pMapping)
3661 iPdNoMapping = pMapping->GCPtr >> GST_PD_SHIFT;
3662 }
3663 }
3664
3665 /* advance. */
3666 Assert(PGM_GST_TYPE == PGM_TYPE_32BIT || (iPD + cPTs - 1) / X86_PG_PAE_ENTRIES == iPD / X86_PG_PAE_ENTRIES);
3667 iPD += cPTs - 1;
3668# if PGM_SHW_TYPE != PGM_GST_TYPE /* SHW==PAE && GST==32BIT */
3669 pPDEDst = pgmShwGetPaePDEPtr(&pVM->pgm.s, (uint32_t)(iPD + 1) << GST_PD_SHIFT);
3670# else
3671 pPDEDst += cPTs;
3672# endif
3673# if PGM_GST_TYPE != PGM_SHW_TYPE
3674 AssertCompile(PGM_GST_TYPE == PGM_TYPE_32BIT && PGM_SHW_TYPE == PGM_TYPE_PAE);
3675# endif
3676# else /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3677 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3678# endif /* (PGM_GST_TYPE != PGM_TYPE_32BIT && PGM_GST_TYPE != PGM_TYPE_PAE) || PGM_WITHOUT_MAPPINGS */
3679 }
3680
3681 } /* for iPD */
3682 } /* for each PDPTE (PAE) */
3683 return VINF_SUCCESS;
3684
3685# else /* guest real and protected mode */
3686 return VINF_SUCCESS;
3687# endif
3688#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3689}
3690
3691
3692
3693
3694#ifdef VBOX_STRICT
3695#ifdef IN_RC
3696# undef AssertMsgFailed
3697# define AssertMsgFailed Log
3698#endif
3699#ifdef IN_RING3
3700# include <VBox/dbgf.h>
3701
3702/**
3703 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3704 *
3705 * @returns VBox status code (VINF_SUCCESS).
3706 * @param pVM The VM handle.
3707 * @param cr3 The root of the hierarchy.
3708 * @param crr The cr4, only PAE and PSE is currently used.
3709 * @param fLongMode Set if long mode, false if not long mode.
3710 * @param cMaxDepth Number of levels to dump.
3711 * @param pHlp Pointer to the output functions.
3712 */
3713__BEGIN_DECLS
3714VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3715__END_DECLS
3716
3717#endif
3718
3719/**
3720 * Checks that the shadow page table is in sync with the guest one.
3721 *
3722 * @returns The number of errors.
3723 * @param pVM The virtual machine.
3724 * @param cr3 Guest context CR3 register
3725 * @param cr4 Guest context CR4 register
3726 * @param GCPtr Where to start. Defaults to 0.
3727 * @param cb How much to check. Defaults to everything.
3728 */
3729PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3730{
3731#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3732 return 0;
3733#else
3734 unsigned cErrors = 0;
3735
3736#if PGM_GST_TYPE == PGM_TYPE_PAE
3737 /** @todo currently broken; crashes below somewhere */
3738 AssertFailed();
3739#endif
3740
3741#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3742 || PGM_GST_TYPE == PGM_TYPE_PAE \
3743 || PGM_GST_TYPE == PGM_TYPE_AMD64
3744
3745# if PGM_GST_TYPE == PGM_TYPE_AMD64
3746 bool fBigPagesSupported = true;
3747# else
3748 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
3749# endif
3750 PPGM pPGM = &pVM->pgm.s;
3751 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3752 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3753# ifndef IN_RING0
3754 RTHCPHYS HCPhys; /* general usage. */
3755# endif
3756 int rc;
3757
3758 /*
3759 * Check that the Guest CR3 and all its mappings are correct.
3760 */
3761 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3762 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3763 false);
3764# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3765# if PGM_GST_TYPE == PGM_TYPE_32BIT
3766 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3767# else
3768 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3769# endif
3770 AssertRCReturn(rc, 1);
3771 HCPhys = NIL_RTHCPHYS;
3772 rc = pgmRamGCPhys2HCPhys(pPGM, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3773 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3774# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3775 RTGCPHYS GCPhys;
3776 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3777 AssertRCReturn(rc, 1);
3778 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3779# endif
3780# endif /* !IN_RING0 */
3781
3782 /*
3783 * Get and check the Shadow CR3.
3784 */
3785# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3786 unsigned cPDEs = X86_PG_ENTRIES;
3787 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3788# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3789# if PGM_GST_TYPE == PGM_TYPE_32BIT
3790 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3791# else
3792 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3793# endif
3794 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3795# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3796 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3797 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3798# endif
3799 if (cb != ~(RTGCPTR)0)
3800 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3801
3802/** @todo call the other two PGMAssert*() functions. */
3803
3804# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3805 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3806# endif
3807
3808# if PGM_GST_TYPE == PGM_TYPE_AMD64
3809 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3810
3811 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3812 {
3813 PPGMPOOLPAGE pShwPdpt = NULL;
3814 PX86PML4E pPml4eSrc;
3815 PX86PML4E pPml4eDst;
3816 RTGCPHYS GCPhysPdptSrc;
3817
3818 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVM->pgm.s, iPml4);
3819 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVM->pgm.s, iPml4);
3820
3821 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3822 if (!pPml4eDst->n.u1Present)
3823 {
3824 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3825 continue;
3826 }
3827
3828 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3829 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3830
3831 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3832 {
3833 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3834 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3835 cErrors++;
3836 continue;
3837 }
3838
3839 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3840 {
3841 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3842 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3843 cErrors++;
3844 continue;
3845 }
3846
3847 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3848 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3849 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3850 {
3851 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3852 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3853 cErrors++;
3854 continue;
3855 }
3856# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3857 {
3858# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3859
3860# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3861 /*
3862 * Check the PDPTEs too.
3863 */
3864 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3865
3866 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3867 {
3868 unsigned iPDSrc;
3869 PPGMPOOLPAGE pShwPde = NULL;
3870 PX86PDPE pPdpeDst;
3871 RTGCPHYS GCPhysPdeSrc;
3872# if PGM_GST_TYPE == PGM_TYPE_PAE
3873 X86PDPE PdpeSrc;
3874 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3875 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
3876# else
3877 PX86PML4E pPml4eSrc;
3878 X86PDPE PdpeSrc;
3879 PX86PDPT pPdptDst;
3880 PX86PDPAE pPDDst;
3881 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3882
3883 rc = pgmShwGetLongModePDPtr(pVM, GCPtr, NULL, &pPdptDst, &pPDDst);
3884 if (rc != VINF_SUCCESS)
3885 {
3886 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3887 GCPtr += 512 * _2M;
3888 continue; /* next PDPTE */
3889 }
3890 Assert(pPDDst);
3891# endif
3892 Assert(iPDSrc == 0);
3893
3894 pPdpeDst = &pPdptDst->a[iPdpt];
3895
3896 if (!pPdpeDst->n.u1Present)
3897 {
3898 GCPtr += 512 * _2M;
3899 continue; /* next PDPTE */
3900 }
3901
3902 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3903 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3904
3905 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3906 {
3907 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3908 GCPtr += 512 * _2M;
3909 cErrors++;
3910 continue;
3911 }
3912
3913 if (GCPhysPdeSrc != pShwPde->GCPhys)
3914 {
3915# if PGM_GST_TYPE == PGM_TYPE_AMD64
3916 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3917# else
3918 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3919# endif
3920 GCPtr += 512 * _2M;
3921 cErrors++;
3922 continue;
3923 }
3924
3925# if PGM_GST_TYPE == PGM_TYPE_AMD64
3926 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3927 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3928 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3929 {
3930 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3931 GCPtr += 512 * _2M;
3932 cErrors++;
3933 continue;
3934 }
3935# endif
3936
3937# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3938 {
3939# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3940# if PGM_GST_TYPE == PGM_TYPE_32BIT
3941 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3942# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3943 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVM->pgm.s);
3944# endif
3945# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3946 /*
3947 * Iterate the shadow page directory.
3948 */
3949 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3950 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3951
3952 for (;
3953 iPDDst < cPDEs;
3954 iPDDst++, GCPtr += cIncrement)
3955 {
3956# if PGM_SHW_TYPE == PGM_TYPE_PAE
3957 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3958# else
3959 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3960# endif
3961 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3962 {
3963 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3964 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3965 {
3966 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3967 cErrors++;
3968 continue;
3969 }
3970 }
3971 else if ( (PdeDst.u & X86_PDE_P)
3972 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3973 )
3974 {
3975 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3976 PPGMPOOLPAGE pPoolPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
3977 if (!pPoolPage)
3978 {
3979 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3980 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3981 cErrors++;
3982 continue;
3983 }
3984 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3985
3986 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3987 {
3988 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3989 GCPtr, (uint64_t)PdeDst.u));
3990 cErrors++;
3991 }
3992
3993 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3994 {
3995 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3996 GCPtr, (uint64_t)PdeDst.u));
3997 cErrors++;
3998 }
3999
4000 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
4001 if (!PdeSrc.n.u1Present)
4002 {
4003 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
4004 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
4005 cErrors++;
4006 continue;
4007 }
4008
4009 if ( !PdeSrc.b.u1Size
4010 || !fBigPagesSupported)
4011 {
4012 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
4013# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4014 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
4015# endif
4016 }
4017 else
4018 {
4019# if PGM_GST_TYPE == PGM_TYPE_32BIT
4020 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
4021 {
4022 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
4023 GCPtr, (uint64_t)PdeSrc.u));
4024 cErrors++;
4025 continue;
4026 }
4027# endif
4028 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
4029# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4030 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
4031# endif
4032 }
4033
4034 if ( pPoolPage->enmKind
4035 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
4036 {
4037 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
4038 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
4039 cErrors++;
4040 }
4041
4042 PPGMPAGE pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
4043 if (!pPhysPage)
4044 {
4045 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4046 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4047 cErrors++;
4048 continue;
4049 }
4050
4051 if (GCPhysGst != pPoolPage->GCPhys)
4052 {
4053 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
4054 GCPhysGst, pPoolPage->GCPhys, GCPtr));
4055 cErrors++;
4056 continue;
4057 }
4058
4059 if ( !PdeSrc.b.u1Size
4060 || !fBigPagesSupported)
4061 {
4062 /*
4063 * Page Table.
4064 */
4065 const GSTPT *pPTSrc;
4066 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
4067 if (RT_FAILURE(rc))
4068 {
4069 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4070 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4071 cErrors++;
4072 continue;
4073 }
4074 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
4075 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
4076 {
4077 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
4078 // (This problem will go away when/if we shadow multiple CR3s.)
4079 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4080 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4081 cErrors++;
4082 continue;
4083 }
4084 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4085 {
4086 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
4087 GCPtr, (uint64_t)PdeDst.u));
4088 cErrors++;
4089 continue;
4090 }
4091
4092 /* iterate the page table. */
4093# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4094 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
4095 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
4096# else
4097 const unsigned offPTSrc = 0;
4098# endif
4099 for (unsigned iPT = 0, off = 0;
4100 iPT < RT_ELEMENTS(pPTDst->a);
4101 iPT++, off += PAGE_SIZE)
4102 {
4103 const SHWPTE PteDst = pPTDst->a[iPT];
4104
4105 /* skip not-present entries. */
4106 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
4107 continue;
4108 Assert(PteDst.n.u1Present);
4109
4110 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
4111 if (!PteSrc.n.u1Present)
4112 {
4113# ifdef IN_RING3
4114 PGMAssertHandlerAndFlagsInSync(pVM);
4115 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
4116# endif
4117 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
4118 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
4119 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
4120 cErrors++;
4121 continue;
4122 }
4123
4124 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
4125# if 1 /** @todo sync accessed bit properly... */
4126 fIgnoreFlags |= X86_PTE_A;
4127# endif
4128
4129 /* match the physical addresses */
4130 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
4131 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
4132
4133# ifdef IN_RING3
4134 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4135 if (RT_FAILURE(rc))
4136 {
4137 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4138 {
4139 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4140 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4141 cErrors++;
4142 continue;
4143 }
4144 }
4145 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4146 {
4147 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4148 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4149 cErrors++;
4150 continue;
4151 }
4152# endif
4153
4154 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
4155 if (!pPhysPage)
4156 {
4157# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4158 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4159 {
4160 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4161 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4162 cErrors++;
4163 continue;
4164 }
4165# endif
4166 if (PteDst.n.u1Write)
4167 {
4168 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4169 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4170 cErrors++;
4171 }
4172 fIgnoreFlags |= X86_PTE_RW;
4173 }
4174 else if (HCPhysShw != (PGM_PAGE_GET_HCPHYS(pPhysPage) & SHW_PTE_PG_MASK))
4175 {
4176 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4177 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4178 cErrors++;
4179 continue;
4180 }
4181
4182 /* flags */
4183 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4184 {
4185 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4186 {
4187 if (PteDst.n.u1Write)
4188 {
4189 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! HCPhys=%RHp PteSrc=%#RX64 PteDst=%#RX64\n",
4190 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4191 cErrors++;
4192 continue;
4193 }
4194 fIgnoreFlags |= X86_PTE_RW;
4195 }
4196 else
4197 {
4198 if (PteDst.n.u1Present)
4199 {
4200 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! HCPhys=%RHp PteSrc=%#RX64 PteDst=%#RX64\n",
4201 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4202 cErrors++;
4203 continue;
4204 }
4205 fIgnoreFlags |= X86_PTE_P;
4206 }
4207 }
4208 else
4209 {
4210 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4211 {
4212 if (PteDst.n.u1Write)
4213 {
4214 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4215 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4216 cErrors++;
4217 continue;
4218 }
4219 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4220 {
4221 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4222 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4223 cErrors++;
4224 continue;
4225 }
4226 if (PteDst.n.u1Dirty)
4227 {
4228 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4229 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4230 cErrors++;
4231 }
4232# if 0 /** @todo sync access bit properly... */
4233 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4234 {
4235 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4236 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4237 cErrors++;
4238 }
4239 fIgnoreFlags |= X86_PTE_RW;
4240# else
4241 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4242# endif
4243 }
4244 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4245 {
4246 /* access bit emulation (not implemented). */
4247 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4248 {
4249 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4250 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4251 cErrors++;
4252 continue;
4253 }
4254 if (!PteDst.n.u1Accessed)
4255 {
4256 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4257 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4258 cErrors++;
4259 }
4260 fIgnoreFlags |= X86_PTE_P;
4261 }
4262# ifdef DEBUG_sandervl
4263 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4264# endif
4265 }
4266
4267 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4268 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4269 )
4270 {
4271 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4272 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4273 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4274 cErrors++;
4275 continue;
4276 }
4277 } /* foreach PTE */
4278 }
4279 else
4280 {
4281 /*
4282 * Big Page.
4283 */
4284 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4285 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4286 {
4287 if (PdeDst.n.u1Write)
4288 {
4289 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4290 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4291 cErrors++;
4292 continue;
4293 }
4294 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4295 {
4296 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4297 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4298 cErrors++;
4299 continue;
4300 }
4301# if 0 /** @todo sync access bit properly... */
4302 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4303 {
4304 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4305 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4306 cErrors++;
4307 }
4308 fIgnoreFlags |= X86_PTE_RW;
4309# else
4310 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4311# endif
4312 }
4313 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4314 {
4315 /* access bit emulation (not implemented). */
4316 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4317 {
4318 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4319 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4320 cErrors++;
4321 continue;
4322 }
4323 if (!PdeDst.n.u1Accessed)
4324 {
4325 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4326 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4327 cErrors++;
4328 }
4329 fIgnoreFlags |= X86_PTE_P;
4330 }
4331
4332 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4333 {
4334 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4335 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4336 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4337 cErrors++;
4338 }
4339
4340 /* iterate the page table. */
4341 for (unsigned iPT = 0, off = 0;
4342 iPT < RT_ELEMENTS(pPTDst->a);
4343 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4344 {
4345 const SHWPTE PteDst = pPTDst->a[iPT];
4346
4347 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4348 {
4349 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4350 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4351 cErrors++;
4352 }
4353
4354 /* skip not-present entries. */
4355 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4356 continue;
4357
4358 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4359
4360 /* match the physical addresses */
4361 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4362
4363# ifdef IN_RING3
4364 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4365 if (RT_FAILURE(rc))
4366 {
4367 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4368 {
4369 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4370 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4371 cErrors++;
4372 }
4373 }
4374 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4375 {
4376 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4377 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4378 cErrors++;
4379 continue;
4380 }
4381# endif
4382 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
4383 if (!pPhysPage)
4384 {
4385# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4386 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
4387 {
4388 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4389 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4390 cErrors++;
4391 continue;
4392 }
4393# endif
4394 if (PteDst.n.u1Write)
4395 {
4396 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4397 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4398 cErrors++;
4399 }
4400 fIgnoreFlags |= X86_PTE_RW;
4401 }
4402 else if (HCPhysShw != (pPhysPage->HCPhys & X86_PTE_PAE_PG_MASK))
4403 {
4404 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4405 GCPtr + off, HCPhysShw, pPhysPage->HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4406 cErrors++;
4407 continue;
4408 }
4409
4410 /* flags */
4411 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4412 {
4413 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4414 {
4415 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4416 {
4417 if (PteDst.n.u1Write)
4418 {
4419 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! HCPhys=%RHp PdeSrc=%#RX64 PteDst=%#RX64\n",
4420 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4421 cErrors++;
4422 continue;
4423 }
4424 fIgnoreFlags |= X86_PTE_RW;
4425 }
4426 }
4427 else
4428 {
4429 if (PteDst.n.u1Present)
4430 {
4431 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! HCPhys=%RHp PdeSrc=%#RX64 PteDst=%#RX64\n",
4432 GCPtr + off, pPhysPage->HCPhys, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4433 cErrors++;
4434 continue;
4435 }
4436 fIgnoreFlags |= X86_PTE_P;
4437 }
4438 }
4439
4440 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4441 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4442 )
4443 {
4444 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4445 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4446 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4447 cErrors++;
4448 continue;
4449 }
4450 } /* for each PTE */
4451 }
4452 }
4453 /* not present */
4454
4455 } /* for each PDE */
4456
4457 } /* for each PDPTE */
4458
4459 } /* for each PML4E */
4460
4461# ifdef DEBUG
4462 if (cErrors)
4463 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4464# endif
4465
4466#endif /* GST == 32BIT, PAE or AMD64 */
4467 return cErrors;
4468
4469#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4470}
4471#endif /* VBOX_STRICT */
4472
4473
4474/**
4475 * Sets up the CR3 for shadow paging
4476 *
4477 * @returns Strict VBox status code.
4478 * @retval VINF_SUCCESS.
4479 *
4480 * @param pVM VM handle.
4481 * @param GCPhysCR3 The physical address in the CR3 register.
4482 */
4483PGM_BTH_DECL(int, MapCR3)(PVM pVM, RTGCPHYS GCPhysCR3)
4484{
4485 /* Update guest paging info. */
4486#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4487 || PGM_GST_TYPE == PGM_TYPE_PAE \
4488 || PGM_GST_TYPE == PGM_TYPE_AMD64
4489
4490 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4491
4492 /*
4493 * Map the page CR3 points at.
4494 */
4495 RTHCPHYS HCPhysGuestCR3;
4496 RTHCPTR HCPtrGuestCR3;
4497 int rc = pgmRamGCPhys2HCPtrAndHCPhysWithFlags(&pVM->pgm.s, GCPhysCR3 & GST_CR3_PAGE_MASK, &HCPtrGuestCR3, &HCPhysGuestCR3);
4498 if (RT_SUCCESS(rc))
4499 {
4500 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4501 if (RT_SUCCESS(rc))
4502 {
4503 PGM_INVL_PG(pVM->pgm.s.GCPtrCR3Mapping);
4504# if PGM_GST_TYPE == PGM_TYPE_32BIT
4505 pVM->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4506# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4507 pVM->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4508# endif
4509 pVM->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))pVM->pgm.s.GCPtrCR3Mapping;
4510
4511# elif PGM_GST_TYPE == PGM_TYPE_PAE
4512 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4513 pVM->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4514# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4515 pVM->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4516# endif
4517 pVM->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RCPTRTYPE(uint8_t *))pVM->pgm.s.GCPtrCR3Mapping + off);
4518 Log(("Cached mapping %RGv\n", pVM->pgm.s.pGstPaePdptRC));
4519
4520 /*
4521 * Map the 4 PDs too.
4522 */
4523 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVM->pgm.s);
4524 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4525 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4526 {
4527 if (pGuestPDPT->a[i].n.u1Present)
4528 {
4529 RTHCPTR HCPtr;
4530 RTHCPHYS HCPhys;
4531 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4532 int rc2 = pgmRamGCPhys2HCPtrAndHCPhysWithFlags(&pVM->pgm.s, GCPhys, &HCPtr, &HCPhys);
4533 if (RT_SUCCESS(rc2))
4534 {
4535 rc = PGMMap(pVM, GCPtr, HCPhys & X86_PTE_PAE_PG_MASK, PAGE_SIZE, 0);
4536 AssertRCReturn(rc, rc);
4537
4538 pVM->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4539# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4540 pVM->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4541# endif
4542 pVM->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))GCPtr;
4543 pVM->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4544 PGM_INVL_PG(GCPtr); /** @todo This ends up calling HWACCMInvalidatePage, is that correct? */
4545 continue;
4546 }
4547 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4548 }
4549
4550 pVM->pgm.s.apGstPaePDsR3[i] = 0;
4551# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4552 pVM->pgm.s.apGstPaePDsR0[i] = 0;
4553# endif
4554 pVM->pgm.s.apGstPaePDsRC[i] = 0;
4555 pVM->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4556 PGM_INVL_PG(GCPtr); /** @todo this shouldn't be necessary? */
4557 }
4558
4559# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4560 pVM->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4561# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4562 pVM->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4563# endif
4564# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4565 if (!HWACCMIsNestedPagingActive(pVM))
4566 {
4567 /*
4568 * Update the shadow root page as well since that's not fixed.
4569 */
4570 /** @todo Move this into PGMAllBth.h. */
4571 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4572 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4573 {
4574 /* It might have been freed already by a pool flush (see e.g. PGMR3MappingsUnfix). */
4575 /** @todo Coordinate this better with the pool. */
4576 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3)->enmKind != PGMPOOLKIND_FREE)
4577 pgmPoolFreeByPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3), PGMPOOL_IDX_AMD64_CR3, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->GCPhys >> PAGE_SHIFT);
4578 pVM->pgm.s.pShwPageCR3R3 = 0;
4579 pVM->pgm.s.pShwPageCR3R0 = 0;
4580 pVM->pgm.s.pShwRootR3 = 0;
4581# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4582 pVM->pgm.s.pShwRootR0 = 0;
4583# endif
4584 pVM->pgm.s.HCPhysShwCR3 = 0;
4585 }
4586
4587 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4588 rc = pgmPoolAlloc(pVM, GCPhysCR3, PGMPOOLKIND_64BIT_PML4, PGMPOOL_IDX_AMD64_CR3, GCPhysCR3 >> PAGE_SHIFT, &pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4589 if (rc == VERR_PGM_POOL_FLUSHED)
4590 {
4591 Log(("MapCR3: PGM pool flushed -> signal sync cr3\n"));
4592 Assert(VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
4593 return VINF_PGM_SYNC_CR3;
4594 }
4595 AssertRCReturn(rc, rc);
4596# ifdef IN_RING0
4597 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4598# else
4599 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4600# endif
4601 pVM->pgm.s.pShwRootR3 = (R3PTRTYPE(void *))pVM->pgm.s.CTX_SUFF(pShwPageCR3)->pvPageR3;
4602 Assert(pVM->pgm.s.pShwRootR3);
4603# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4604 pVM->pgm.s.pShwRootR0 = (R0PTRTYPE(void *))PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4605# endif
4606 pVM->pgm.s.HCPhysShwCR3 = pVM->pgm.s.CTX_SUFF(pShwPageCR3)->Core.Key;
4607 rc = VINF_SUCCESS; /* clear it - pgmPoolAlloc returns hints. */
4608 }
4609# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
4610# endif
4611 }
4612 else
4613 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4614 }
4615 else
4616 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4617
4618#else /* prot/real stub */
4619 int rc = VINF_SUCCESS;
4620#endif
4621
4622#ifdef VBOX_WITH_PGMPOOL_PAGING_ONLY
4623 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4624# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4625 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4626 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4627 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4628 && PGM_GST_TYPE != PGM_TYPE_PROT))
4629
4630 Assert(!HWACCMIsNestedPagingActive(pVM));
4631
4632 /*
4633 * Update the shadow root page as well since that's not fixed.
4634 */
4635 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4636 PPGMPOOLPAGE pOldShwPageCR3 = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
4637 uint32_t iOldShwUserTable = pVM->pgm.s.iShwUserTable;
4638 uint32_t iOldShwUser = pVM->pgm.s.iShwUser;
4639
4640 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4641 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4642 if (rc == VERR_PGM_POOL_FLUSHED)
4643 {
4644 Log(("MapCR3: PGM pool flushed -> signal sync cr3\n"));
4645 Assert(VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
4646 return VINF_PGM_SYNC_CR3;
4647 }
4648 AssertRCReturn(rc, rc);
4649 pVM->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4650 pVM->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4651# ifdef IN_RING0
4652 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4653 pVM->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4654# elif defined(IN_RC)
4655 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4656 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4657# else
4658 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4659 pVM->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4660# endif
4661 pVM->pgm.s.pShwRootR3 = (R3PTRTYPE(void *))pVM->pgm.s.CTX_SUFF(pShwPageCR3)->pvPageR3;
4662 Assert(pVM->pgm.s.pShwRootR3);
4663# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4664 pVM->pgm.s.pShwRootR0 = (R0PTRTYPE(void *))PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4665# endif
4666 pVM->pgm.s.HCPhysShwCR3 = pVM->pgm.s.CTX_SUFF(pShwPageCR3)->Core.Key;
4667
4668# ifndef PGM_WITHOUT_MAPPINGS
4669 /* Apply all hypervisor mappings to the new CR3. */
4670 rc = PGMMapActivateAll(pVM);
4671 AssertRCReturn(rc, rc);
4672# endif
4673
4674 /* Set the current hypervisor CR3. */
4675 CPUMSetHyperCR3(pVM, PGMGetHyperCR3(pVM));
4676
4677 /* Clean up the old CR3 root. */
4678 if (pOldShwPageCR3)
4679 {
4680# ifndef PGM_WITHOUT_MAPPINGS
4681 /* Remove the hypervisor mappings from the shadow page table. */
4682 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4683# endif
4684 /* It might have been freed already by a pool flush (see e.g. PGMR3MappingsUnfix). */
4685 /** @todo Coordinate this better with the pool. */
4686 if (pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE)
4687 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4688 }
4689
4690# endif
4691#endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY */
4692
4693 return rc;
4694}
4695
4696/**
4697 * Unmaps the shadow CR3.
4698 *
4699 * @returns VBox status, no specials.
4700 * @param pVM VM handle.
4701 */
4702PGM_BTH_DECL(int, UnmapCR3)(PVM pVM)
4703{
4704 LogFlow(("UnmapCR3\n"));
4705
4706 int rc = VINF_SUCCESS;
4707
4708 /* Update guest paging info. */
4709#if PGM_GST_TYPE == PGM_TYPE_32BIT
4710 pVM->pgm.s.pGst32BitPdR3 = 0;
4711#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4712 pVM->pgm.s.pGst32BitPdR0 = 0;
4713#endif
4714 pVM->pgm.s.pGst32BitPdRC = 0;
4715
4716#elif PGM_GST_TYPE == PGM_TYPE_PAE
4717 pVM->pgm.s.pGstPaePdptR3 = 0;
4718# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4719 pVM->pgm.s.pGstPaePdptR0 = 0;
4720# endif
4721 pVM->pgm.s.pGstPaePdptRC = 0;
4722 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4723 {
4724 pVM->pgm.s.apGstPaePDsR3[i] = 0;
4725# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4726 pVM->pgm.s.apGstPaePDsR0[i] = 0;
4727# endif
4728 pVM->pgm.s.apGstPaePDsRC[i] = 0;
4729 pVM->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4730 }
4731
4732#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4733 pVM->pgm.s.pGstAmd64Pml4R3 = 0;
4734# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4735 pVM->pgm.s.pGstAmd64Pml4R0 = 0;
4736# endif
4737# ifndef VBOX_WITH_PGMPOOL_PAGING_ONLY
4738 if (!HWACCMIsNestedPagingActive(pVM))
4739 {
4740 pVM->pgm.s.pShwRootR3 = 0;
4741# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4742 pVM->pgm.s.pShwRootR0 = 0;
4743# endif
4744 pVM->pgm.s.HCPhysShwCR3 = 0;
4745 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4746 {
4747 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4748 pgmPoolFreeByPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3), PGMPOOL_IDX_AMD64_CR3, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->GCPhys >> PAGE_SHIFT);
4749 pVM->pgm.s.pShwPageCR3R3 = 0;
4750 pVM->pgm.s.pShwPageCR3R0 = 0;
4751 }
4752 }
4753# endif /* !VBOX_WITH_PGMPOOL_PAGING_ONLY */
4754
4755#else /* prot/real mode stub */
4756 /* nothing to do */
4757#endif
4758
4759#if defined(VBOX_WITH_PGMPOOL_PAGING_ONLY) && !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4760 /* Update shadow paging info. */
4761# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4762 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4763 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4764 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4765 && PGM_GST_TYPE != PGM_TYPE_PROT))
4766
4767 Assert(!HWACCMIsNestedPagingActive(pVM));
4768
4769# ifndef PGM_WITHOUT_MAPPINGS
4770 /* Remove the hypervisor mappings from the shadow page table. */
4771 PGMMapDeactivateAll(pVM);
4772# endif
4773
4774 pVM->pgm.s.pShwRootR3 = 0;
4775# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4776 pVM->pgm.s.pShwRootR0 = 0;
4777# endif
4778 pVM->pgm.s.HCPhysShwCR3 = 0;
4779 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4780 {
4781 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4782 pgmPoolFreeByPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3), pVM->pgm.s.iShwUser, pVM->pgm.s.iShwUserTable);
4783 pVM->pgm.s.pShwPageCR3R3 = 0;
4784 pVM->pgm.s.pShwPageCR3R0 = 0;
4785 pVM->pgm.s.iShwUser = 0;
4786 pVM->pgm.s.iShwUserTable = 0;
4787 }
4788# endif
4789#endif /* VBOX_WITH_PGMPOOL_PAGING_ONLY && !IN_RC*/
4790
4791 return rc;
4792}
4793
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette