VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 27488

Last change on this file since 27488 was 27488, checked in by vboxsync, 15 years ago

No need for pgmPhysPageMakeWritableUnlocked

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 196.4 KB
Line 
1/* $Id: PGMAllBth.h 27488 2010-03-18 16:12:49Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27RT_C_DECLS_BEGIN
28PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
29PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
34PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
35PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
36PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
37#ifdef VBOX_STRICT
38PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
39#endif
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
42PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
43RT_C_DECLS_END
44
45
46/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
47#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
48# error "Invalid combination; PAE guest implies PAE shadow"
49#endif
50
51#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
52 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
53# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
54#endif
55
56#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
57 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
58# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
59#endif
60
61#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
62 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
63# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
64#endif
65
66
67#ifndef IN_RING3
68/**
69 * #PF Handler for raw-mode guest execution.
70 *
71 * @returns VBox status code (appropriate for trap handling and GC return).
72 *
73 * @param pVCpu VMCPU Handle.
74 * @param uErr The trap error code.
75 * @param pRegFrame Trap register frame.
76 * @param pvFault The fault address.
77 * @param pfLockTaken PGM lock taken here or not (out)
78 */
79PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
80{
81 PVM pVM = pVCpu->CTX_SUFF(pVM);
82
83 *pfLockTaken = false;
84
85# if defined(IN_RC) && defined(VBOX_STRICT)
86 PGMDynCheckLocks(pVM);
87# endif
88
89# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
90 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
91 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
92
93# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
94 /*
95 * Hide the instruction fetch trap indicator for now.
96 */
97 /** @todo NXE will change this and we must fix NXE in the switcher too! */
98 if (uErr & X86_TRAP_PF_ID)
99 {
100 uErr &= ~X86_TRAP_PF_ID;
101 TRPMSetErrorCode(pVCpu, uErr);
102 }
103# endif
104
105 /*
106 * Get PDs.
107 */
108 int rc;
109# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
110# if PGM_GST_TYPE == PGM_TYPE_32BIT
111 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
112 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
113
114# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
115
116# if PGM_GST_TYPE == PGM_TYPE_PAE
117 unsigned iPDSrc = 0; /* initialized to shut up gcc */
118 X86PDPE PdpeSrc;
119 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
120
121# elif PGM_GST_TYPE == PGM_TYPE_AMD64
122 unsigned iPDSrc = 0; /* initialized to shut up gcc */
123 PX86PML4E pPml4eSrc;
124 X86PDPE PdpeSrc;
125 PGSTPD pPDSrc;
126
127 pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
128 Assert(pPml4eSrc);
129# endif
130
131 /* Quick check for a valid guest trap. (PAE & AMD64) */
132 if (!pPDSrc)
133 {
134# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
135 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
136# else
137 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
138# endif
139 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
140 TRPMSetErrorCode(pVCpu, uErr);
141 return VINF_EM_RAW_GUEST_TRAP;
142 }
143# endif
144
145# else /* !PGM_WITH_PAGING */
146 PGSTPD pPDSrc = NULL;
147 const unsigned iPDSrc = 0;
148# endif /* !PGM_WITH_PAGING */
149
150 /* First check for a genuine guest page fault. */
151# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
152 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
153 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDSrc->a[iPDSrc], pvFault);
154 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
155 if (rc == VINF_EM_RAW_GUEST_TRAP)
156 {
157 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
158 = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
159 return rc;
160 }
161# endif /* PGM_WITH_PAGING */
162
163 /* Take the big lock now. */
164 *pfLockTaken = true;
165 pgmLock(pVM);
166
167 /* Fetch the guest PDE */
168# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
169 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
170# else
171 GSTPDE PdeSrc;
172 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
173 PdeSrc.n.u1Present = 1;
174 PdeSrc.n.u1Write = 1;
175 PdeSrc.n.u1Accessed = 1;
176 PdeSrc.n.u1User = 1;
177# endif
178
179# if PGM_SHW_TYPE == PGM_TYPE_32BIT
180 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
181 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
182
183# elif PGM_SHW_TYPE == PGM_TYPE_PAE
184 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
185
186 PX86PDPAE pPDDst;
187# if PGM_GST_TYPE != PGM_TYPE_PAE
188 X86PDPE PdpeSrc;
189
190 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
191 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
192# endif
193 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
194 if (rc != VINF_SUCCESS)
195 {
196 AssertRC(rc);
197 return rc;
198 }
199 Assert(pPDDst);
200
201# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
202 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
203 PX86PDPAE pPDDst;
204# if PGM_GST_TYPE == PGM_TYPE_PROT
205 /* AMD-V nested paging */
206 X86PML4E Pml4eSrc;
207 X86PDPE PdpeSrc;
208 PX86PML4E pPml4eSrc = &Pml4eSrc;
209
210 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
211 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
212 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
213# endif
214
215 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
216 if (rc != VINF_SUCCESS)
217 {
218 AssertRC(rc);
219 return rc;
220 }
221 Assert(pPDDst);
222
223# elif PGM_SHW_TYPE == PGM_TYPE_EPT
224 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
225 PEPTPD pPDDst;
226
227 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
228 if (rc != VINF_SUCCESS)
229 {
230 AssertRC(rc);
231 return rc;
232 }
233 Assert(pPDDst);
234# endif
235
236# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
237 /* Dirty page handling. */
238 if (uErr & X86_TRAP_PF_RW) /* write fault? */
239 {
240 /*
241 * If we successfully correct the write protection fault due to dirty bit
242 * tracking, then return immediately.
243 */
244 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
245 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
246 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
247 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
248 {
249 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
250 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
251 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
252 return VINF_SUCCESS;
253 }
254 }
255
256# if 0 /* rarely useful; leave for debugging. */
257 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
258# endif
259# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
260
261 /*
262 * A common case is the not-present error caused by lazy page table syncing.
263 *
264 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
265 * so we can safely assume that the shadow PT is present when calling SyncPage later.
266 *
267 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
268 * of mapping conflict and defer to SyncCR3 in R3.
269 * (Again, we do NOT support access handlers for non-present guest pages.)
270 *
271 */
272 Assert(PdeSrc.n.u1Present);
273 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
274 && !pPDDst->a[iPDDst].n.u1Present
275 )
276 {
277 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
278 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
279 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
280 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
281 if (RT_SUCCESS(rc))
282 {
283 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
284 return rc;
285 }
286 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
287 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
288 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
289 return VINF_PGM_SYNC_CR3;
290 }
291
292# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
293 /*
294 * Check if this address is within any of our mappings.
295 *
296 * This is *very* fast and it's gonna save us a bit of effort below and prevent
297 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
298 * (BTW, it's impossible to have physical access handlers in a mapping.)
299 */
300 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
301 {
302 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
303 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
304 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
305 {
306 if (pvFault < pMapping->GCPtr)
307 break;
308 if (pvFault - pMapping->GCPtr < pMapping->cb)
309 {
310 /*
311 * The first thing we check is if we've got an undetected conflict.
312 */
313 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
314 {
315 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
316 while (iPT-- > 0)
317 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
318 {
319 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
320 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
321 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
322 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
323 return VINF_PGM_SYNC_CR3;
324 }
325 }
326
327 /*
328 * Check if the fault address is in a virtual page access handler range.
329 */
330 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
331 if ( pCur
332 && pvFault - pCur->Core.Key < pCur->cb
333 && uErr & X86_TRAP_PF_RW)
334 {
335# ifdef IN_RC
336 STAM_PROFILE_START(&pCur->Stat, h);
337 pgmUnlock(pVM);
338 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
339 pgmLock(pVM);
340 STAM_PROFILE_STOP(&pCur->Stat, h);
341# else
342 AssertFailed();
343 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
344# endif
345 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
346 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
347 return rc;
348 }
349
350 /*
351 * Pretend we're not here and let the guest handle the trap.
352 */
353 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
354 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
355 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
356 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
357 return VINF_EM_RAW_GUEST_TRAP;
358 }
359 }
360 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
361 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
362# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
363
364 /*
365 * Check if this fault address is flagged for special treatment,
366 * which means we'll have to figure out the physical address and
367 * check flags associated with it.
368 *
369 * ASSUME that we can limit any special access handling to pages
370 * in page tables which the guest believes to be present.
371 */
372 Assert(PdeSrc.n.u1Present);
373 {
374 RTGCPHYS GCPhys = NIL_RTGCPHYS;
375
376# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
377 if ( PdeSrc.b.u1Size
378# if PGM_GST_TYPE == PGM_TYPE_32BIT
379 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
380# endif
381 )
382 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
383 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
384 else
385 {
386 PGSTPT pPTSrc;
387 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
388 if (RT_SUCCESS(rc))
389 {
390 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
391 if (pPTSrc->a[iPTESrc].n.u1Present)
392 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
393 }
394 }
395# else
396 /* No paging so the fault address is the physical address */
397 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
398# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
399
400 /*
401 * If we have a GC address we'll check if it has any flags set.
402 */
403 if (GCPhys != NIL_RTGCPHYS)
404 {
405 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
406
407 PPGMPAGE pPage;
408 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
409 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
410 {
411 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
412 {
413 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
414 {
415 /*
416 * Physical page access handler.
417 */
418 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
419 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
420 if (pCur)
421 {
422# ifdef PGM_SYNC_N_PAGES
423 /*
424 * If the region is write protected and we got a page not present fault, then sync
425 * the pages. If the fault was caused by a read, then restart the instruction.
426 * In case of write access continue to the GC write handler.
427 *
428 * ASSUMES that there is only one handler per page or that they have similar write properties.
429 */
430 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
431 && !(uErr & X86_TRAP_PF_P))
432 {
433 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
434 if ( RT_FAILURE(rc)
435 || !(uErr & X86_TRAP_PF_RW)
436 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
437 {
438 AssertRC(rc);
439 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
440 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
441 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
442 return rc;
443 }
444 }
445# endif
446
447 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
448 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
449 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
450
451# if defined(IN_RC) || defined(IN_RING0)
452 if (pCur->CTX_SUFF(pfnHandler))
453 {
454 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
455# ifdef IN_RING0
456 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
457# else
458 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
459# endif
460 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
461 void *pvUser = pCur->CTX_SUFF(pvUser);
462
463 STAM_PROFILE_START(&pCur->Stat, h);
464 if (fLeaveLock)
465 pgmUnlock(pVM); /* @todo: Not entirely safe. */
466
467 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
468 if (fLeaveLock)
469 pgmLock(pVM);
470# ifdef VBOX_WITH_STATISTICS
471 pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
472 if (pCur)
473 STAM_PROFILE_STOP(&pCur->Stat, h);
474# else
475 pCur = NULL; /* might be invalid by now. */
476# endif
477
478 }
479 else
480# endif
481 rc = VINF_EM_RAW_EMULATE_INSTR;
482
483 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
484 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
485 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
486 return rc;
487 }
488 }
489# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
490 else
491 {
492# ifdef PGM_SYNC_N_PAGES
493 /*
494 * If the region is write protected and we got a page not present fault, then sync
495 * the pages. If the fault was caused by a read, then restart the instruction.
496 * In case of write access continue to the GC write handler.
497 */
498 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
499 && !(uErr & X86_TRAP_PF_P))
500 {
501 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
502 if ( RT_FAILURE(rc)
503 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
504 || !(uErr & X86_TRAP_PF_RW))
505 {
506 AssertRC(rc);
507 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
508 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
509 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
510 return rc;
511 }
512 }
513# endif
514 /*
515 * Ok, it's an virtual page access handler.
516 *
517 * Since it's faster to search by address, we'll do that first
518 * and then retry by GCPhys if that fails.
519 */
520 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
521 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
522 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
523 */
524 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
525 if (pCur)
526 {
527 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
528 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
529 || !(uErr & X86_TRAP_PF_P)
530 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
531 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
532
533 if ( pvFault - pCur->Core.Key < pCur->cb
534 && ( uErr & X86_TRAP_PF_RW
535 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
536 {
537# ifdef IN_RC
538 STAM_PROFILE_START(&pCur->Stat, h);
539 pgmUnlock(pVM);
540 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
541 pgmLock(pVM);
542 STAM_PROFILE_STOP(&pCur->Stat, h);
543# else
544 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
545# endif
546 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
547 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
548 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
549 return rc;
550 }
551 /* Unhandled part of a monitored page */
552 }
553 else
554 {
555 /* Check by physical address. */
556 unsigned iPage;
557 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
558 &pCur, &iPage);
559 Assert(RT_SUCCESS(rc) || !pCur);
560 if ( pCur
561 && ( uErr & X86_TRAP_PF_RW
562 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
563 {
564 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
565# ifdef IN_RC
566 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
567 Assert(off < pCur->cb);
568 STAM_PROFILE_START(&pCur->Stat, h);
569 pgmUnlock(pVM);
570 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
571 pgmLock(pVM);
572 STAM_PROFILE_STOP(&pCur->Stat, h);
573# else
574 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
575# endif
576 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
577 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
578 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
579 return rc;
580 }
581 }
582 }
583# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
584
585 /*
586 * There is a handled area of the page, but this fault doesn't belong to it.
587 * We must emulate the instruction.
588 *
589 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
590 * we first check if this was a page-not-present fault for a page with only
591 * write access handlers. Restart the instruction if it wasn't a write access.
592 */
593 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
594
595 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
596 && !(uErr & X86_TRAP_PF_P))
597 {
598 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
599 if ( RT_FAILURE(rc)
600 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
601 || !(uErr & X86_TRAP_PF_RW))
602 {
603 AssertRC(rc);
604 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
605 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
606 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
607 return rc;
608 }
609 }
610
611 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
612 * It's writing to an unhandled part of the LDT page several million times.
613 */
614 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
615 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
616 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
617 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
618 return rc;
619 } /* if any kind of handler */
620
621# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
622 if (uErr & X86_TRAP_PF_P)
623 {
624 /*
625 * The page isn't marked, but it might still be monitored by a virtual page access handler.
626 * (ASSUMES no temporary disabling of virtual handlers.)
627 */
628 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
629 * we should correct both the shadow page table and physical memory flags, and not only check for
630 * accesses within the handler region but for access to pages with virtual handlers. */
631 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
632 if (pCur)
633 {
634 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
635 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
636 || !(uErr & X86_TRAP_PF_P)
637 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
638 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
639
640 if ( pvFault - pCur->Core.Key < pCur->cb
641 && ( uErr & X86_TRAP_PF_RW
642 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
643 {
644# ifdef IN_RC
645 STAM_PROFILE_START(&pCur->Stat, h);
646 pgmUnlock(pVM);
647 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
648 pgmLock(pVM);
649 STAM_PROFILE_STOP(&pCur->Stat, h);
650# else
651 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
652# endif
653 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
654 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
655 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
656 return rc;
657 }
658 }
659 }
660# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
661 }
662 else
663 {
664 /*
665 * When the guest accesses invalid physical memory (e.g. probing
666 * of RAM or accessing a remapped MMIO range), then we'll fall
667 * back to the recompiler to emulate the instruction.
668 */
669 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
670 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
671 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
672 return VINF_EM_RAW_EMULATE_INSTR;
673 }
674
675 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
676
677# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
678 /*
679 * We are here only if page is present in Guest page tables and
680 * trap is not handled by our handlers.
681 *
682 * Check it for page out-of-sync situation.
683 */
684 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
685
686 if (!(uErr & X86_TRAP_PF_P))
687 {
688 /*
689 * Page is not present in our page tables.
690 * Try to sync it!
691 * BTW, fPageShw is invalid in this branch!
692 */
693 if (uErr & X86_TRAP_PF_US)
694 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
695 else /* supervisor */
696 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
697
698# if defined(LOG_ENABLED) && !defined(IN_RING0)
699 RTGCPHYS GCPhys2;
700 uint64_t fPageGst2;
701 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
702 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
703 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
704# endif /* LOG_ENABLED */
705
706# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
707 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
708 {
709 uint64_t fPageGst;
710 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
711 if ( RT_SUCCESS(rc)
712 && !(fPageGst & X86_PTE_US))
713 {
714 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
715 if ( pvFault == (RTGCPTR)pRegFrame->eip
716 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
717# ifdef CSAM_DETECT_NEW_CODE_PAGES
718 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
719 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
720# endif /* CSAM_DETECT_NEW_CODE_PAGES */
721 )
722 {
723 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
724 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
725 if (rc != VINF_SUCCESS)
726 {
727 /*
728 * CSAM needs to perform a job in ring 3.
729 *
730 * Sync the page before going to the host context; otherwise we'll end up in a loop if
731 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
732 */
733 LogFlow(("CSAM ring 3 job\n"));
734 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
735 AssertRC(rc2);
736
737 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
738 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
739 return rc;
740 }
741 }
742# ifdef CSAM_DETECT_NEW_CODE_PAGES
743 else if ( uErr == X86_TRAP_PF_RW
744 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
745 && pRegFrame->ecx < 0x10000)
746 {
747 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
748 * to detect loading of new code pages.
749 */
750
751 /*
752 * Decode the instruction.
753 */
754 RTGCPTR PC;
755 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
756 if (rc == VINF_SUCCESS)
757 {
758 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
759 uint32_t cbOp;
760 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
761
762 /* For now we'll restrict this to rep movsw/d instructions */
763 if ( rc == VINF_SUCCESS
764 && pDis->pCurInstr->opcode == OP_MOVSWD
765 && (pDis->prefix & PREFIX_REP))
766 {
767 CSAMMarkPossibleCodePage(pVM, pvFault);
768 }
769 }
770 }
771# endif /* CSAM_DETECT_NEW_CODE_PAGES */
772
773 /*
774 * Mark this page as safe.
775 */
776 /** @todo not correct for pages that contain both code and data!! */
777 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
778 CSAMMarkPage(pVM, pvFault, true);
779 }
780 }
781# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
782 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
783 if (RT_SUCCESS(rc))
784 {
785 /* The page was successfully synced, return to the guest. */
786 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
787 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
788 return VINF_SUCCESS;
789 }
790 }
791 else /* uErr & X86_TRAP_PF_P: */
792 {
793 /*
794 * Write protected pages are make writable when the guest makes the first
795 * write to it. This happens for pages that are shared, write monitored
796 * and not yet allocated.
797 *
798 * Also, a side effect of not flushing global PDEs are out of sync pages due
799 * to physical monitored regions, that are no longer valid.
800 * Assume for now it only applies to the read/write flag.
801 */
802 if ( RT_SUCCESS(rc)
803 && (uErr & X86_TRAP_PF_RW))
804 {
805 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
806 {
807 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n",
808 GCPhys, pPage, pvFault, uErr));
809 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
810 if (rc != VINF_SUCCESS)
811 {
812 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
813 return rc;
814 }
815 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
816 return VINF_EM_NO_MEMORY;
817 }
818
819# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
820 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
821 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
822 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG))
823 {
824 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
825 uint64_t fPageGst;
826 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
827 if ( RT_SUCCESS(rc)
828 && !(fPageGst & X86_PTE_RW))
829 {
830 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
831 if (RT_SUCCESS(rc))
832 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
833 else
834 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
835 return rc;
836 }
837 AssertMsg(RT_SUCCESS(rc), ("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
838 }
839# endif
840 /// @todo count the above case; else
841 if (uErr & X86_TRAP_PF_US)
842 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
843 else /* supervisor */
844 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
845
846 /*
847 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
848 * page is not present, which is not true in this case.
849 */
850 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
851 if (RT_SUCCESS(rc))
852 {
853 /*
854 * Page was successfully synced, return to guest.
855 * First invalidate the page as it might be in the TLB.
856 */
857# if PGM_SHW_TYPE == PGM_TYPE_EPT
858 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
859# else
860 PGM_INVL_PG(pVCpu, pvFault);
861# endif
862# ifdef VBOX_STRICT
863 RTGCPHYS GCPhys2;
864 uint64_t fPageGst;
865 if (!HWACCMIsNestedPagingActive(pVM))
866 {
867 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
868 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%d fPageGst=%RX64\n"));
869 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
870 }
871 uint64_t fPageShw;
872 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
873 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
874# endif /* VBOX_STRICT */
875 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
876 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
877 return VINF_SUCCESS;
878 }
879 }
880
881# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
882# ifdef VBOX_STRICT
883 /*
884 * Check for VMM page flags vs. Guest page flags consistency.
885 * Currently only for debug purposes.
886 */
887 if (RT_SUCCESS(rc))
888 {
889 /* Get guest page flags. */
890 uint64_t fPageGst;
891 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
892 if (RT_SUCCESS(rc))
893 {
894 uint64_t fPageShw;
895 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
896
897 /*
898 * Compare page flags.
899 * Note: we have AVL, A, D bits desynched.
900 */
901 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
902 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
903 }
904 else
905 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
906 }
907 else
908 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
909# endif /* VBOX_STRICT */
910# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
911 }
912 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
913# endif /* PGM_OUT_OF_SYNC_IN_GC */
914 }
915 else /* GCPhys == NIL_RTGCPHYS */
916 {
917 /*
918 * Page not present in Guest OS or invalid page table address.
919 * This is potential virtual page access handler food.
920 *
921 * For the present we'll say that our access handlers don't
922 * work for this case - we've already discarded the page table
923 * not present case which is identical to this.
924 *
925 * When we perchance find we need this, we will probably have AVL
926 * trees (offset based) to operate on and we can measure their speed
927 * agains mapping a page table and probably rearrange this handling
928 * a bit. (Like, searching virtual ranges before checking the
929 * physical address.)
930 */
931 }
932 }
933
934# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
935 /*
936 * Conclusion, this is a guest trap.
937 */
938 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
939 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
940 return VINF_EM_RAW_GUEST_TRAP;
941# else
942 /* present, but not a monitored page; perhaps the guest is probing physical memory */
943 return VINF_EM_RAW_EMULATE_INSTR;
944# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
945
946
947# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
948
949 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
950 return VERR_INTERNAL_ERROR;
951# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
952}
953#endif /* !IN_RING3 */
954
955
956/**
957 * Emulation of the invlpg instruction.
958 *
959 *
960 * @returns VBox status code.
961 *
962 * @param pVCpu The VMCPU handle.
963 * @param GCPtrPage Page to invalidate.
964 *
965 * @remark ASSUMES that the guest is updating before invalidating. This order
966 * isn't required by the CPU, so this is speculative and could cause
967 * trouble.
968 * @remark No TLB shootdown is done on any other VCPU as we assume that
969 * invlpg emulation is the *only* reason for calling this function.
970 * (The guest has to shoot down TLB entries on other CPUs itself)
971 * Currently true, but keep in mind!
972 *
973 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
974 */
975PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
976{
977#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
978 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
979 && PGM_SHW_TYPE != PGM_TYPE_EPT
980 int rc;
981 PVM pVM = pVCpu->CTX_SUFF(pVM);
982 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
983
984 Assert(PGMIsLockOwner(pVM));
985
986 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
987
988# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
989 if (pPool->cDirtyPages)
990 pgmPoolResetDirtyPages(pVM);
991# endif
992
993 /*
994 * Get the shadow PD entry and skip out if this PD isn't present.
995 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
996 */
997# if PGM_SHW_TYPE == PGM_TYPE_32BIT
998 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
999 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1000
1001 /* Fetch the pgm pool shadow descriptor. */
1002 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1003 Assert(pShwPde);
1004
1005# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1006 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1007 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
1008
1009 /* If the shadow PDPE isn't present, then skip the invalidate. */
1010 if (!pPdptDst->a[iPdpt].n.u1Present)
1011 {
1012 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1013 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1014 return VINF_SUCCESS;
1015 }
1016
1017 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1018 PPGMPOOLPAGE pShwPde = NULL;
1019 PX86PDPAE pPDDst;
1020
1021 /* Fetch the pgm pool shadow descriptor. */
1022 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1023 AssertRCSuccessReturn(rc, rc);
1024 Assert(pShwPde);
1025
1026 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1027 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1028
1029# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1030 /* PML4 */
1031 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1032 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1033 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1034 PX86PDPAE pPDDst;
1035 PX86PDPT pPdptDst;
1036 PX86PML4E pPml4eDst;
1037 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1038 if (rc != VINF_SUCCESS)
1039 {
1040 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1041 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1042 return VINF_SUCCESS;
1043 }
1044 Assert(pPDDst);
1045
1046 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1047 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1048
1049 if (!pPdpeDst->n.u1Present)
1050 {
1051 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1052 return VINF_SUCCESS;
1053 }
1054
1055 /* Fetch the pgm pool shadow descriptor. */
1056 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1057 Assert(pShwPde);
1058
1059# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1060
1061 const SHWPDE PdeDst = *pPdeDst;
1062 if (!PdeDst.n.u1Present)
1063 {
1064 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1065 return VINF_SUCCESS;
1066 }
1067
1068# if defined(IN_RC)
1069 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1070 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1071# endif
1072
1073 /*
1074 * Get the guest PD entry and calc big page.
1075 */
1076# if PGM_GST_TYPE == PGM_TYPE_32BIT
1077 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
1078 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1079 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1080# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1081 unsigned iPDSrc = 0;
1082# if PGM_GST_TYPE == PGM_TYPE_PAE
1083 X86PDPE PdpeSrc;
1084 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1085# else /* AMD64 */
1086 PX86PML4E pPml4eSrc;
1087 X86PDPE PdpeSrc;
1088 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1089# endif
1090 GSTPDE PdeSrc;
1091
1092 if (pPDSrc)
1093 PdeSrc = pPDSrc->a[iPDSrc];
1094 else
1095 PdeSrc.u = 0;
1096# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1097
1098# if PGM_GST_TYPE == PGM_TYPE_32BIT
1099 const bool fIsBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1100# else
1101 const bool fIsBigPage = PdeSrc.b.u1Size;
1102# endif
1103
1104# ifdef IN_RING3
1105 /*
1106 * If a CR3 Sync is pending we may ignore the invalidate page operation
1107 * depending on the kind of sync and if it's a global page or not.
1108 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1109 */
1110# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1111 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1112 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1113 && fIsBigPage
1114 && PdeSrc.b.u1Global
1115 )
1116 )
1117# else
1118 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1119# endif
1120 {
1121 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1122 return VINF_SUCCESS;
1123 }
1124# endif /* IN_RING3 */
1125
1126 /*
1127 * Deal with the Guest PDE.
1128 */
1129 rc = VINF_SUCCESS;
1130 if (PdeSrc.n.u1Present)
1131 {
1132 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1133 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1134# ifndef PGM_WITHOUT_MAPPING
1135 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1136 {
1137 /*
1138 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1139 */
1140 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1141 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1142 pgmLock(pVM);
1143 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1144 pgmUnlock(pVM);
1145 }
1146 else
1147# endif /* !PGM_WITHOUT_MAPPING */
1148 if (!fIsBigPage)
1149 {
1150 /*
1151 * 4KB - page.
1152 */
1153 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1154 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1155
1156# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1157 /* Reset the modification counter (OpenSolaris trashes tlb entries very often) */
1158 if (pShwPage->cModifications)
1159 pShwPage->cModifications = 1;
1160# endif
1161
1162# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1163 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1164 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1165# endif
1166 if (pShwPage->GCPhys == GCPhys)
1167 {
1168# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1169 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1170 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1171 if (pPT->a[iPTEDst].n.u1Present)
1172 {
1173 /* This is very unlikely with caching/monitoring enabled. */
1174 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1175 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1176 }
1177# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1178 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1179 if (RT_SUCCESS(rc))
1180 rc = VINF_SUCCESS;
1181# endif
1182 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1183 PGM_INVL_PG(pVCpu, GCPtrPage);
1184 }
1185 else
1186 {
1187 /*
1188 * The page table address changed.
1189 */
1190 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1191 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1192 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1193 ASMAtomicWriteSize(pPdeDst, 0);
1194 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1195 PGM_INVL_VCPU_TLBS(pVCpu);
1196 }
1197 }
1198 else
1199 {
1200 /*
1201 * 2/4MB - page.
1202 */
1203 /* Before freeing the page, check if anything really changed. */
1204 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1205 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1206# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1207 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1208 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1209# endif
1210 if ( pShwPage->GCPhys == GCPhys
1211 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1212 {
1213 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1214 /** @todo PAT */
1215 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1216 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1217 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1218 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1219 {
1220 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1221 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1222# if defined(IN_RC)
1223 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1224 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1225# endif
1226 return VINF_SUCCESS;
1227 }
1228 }
1229
1230 /*
1231 * Ok, the page table is present and it's been changed in the guest.
1232 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1233 * We could do this for some flushes in GC too, but we need an algorithm for
1234 * deciding which 4MB pages containing code likely to be executed very soon.
1235 */
1236 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1237 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1238 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1239 ASMAtomicWriteSize(pPdeDst, 0);
1240 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1241 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1242 }
1243 }
1244 else
1245 {
1246 /*
1247 * Page directory is not present, mark shadow PDE not present.
1248 */
1249 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1250 {
1251 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1252 ASMAtomicWriteSize(pPdeDst, 0);
1253 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1254 PGM_INVL_PG(pVCpu, GCPtrPage);
1255 }
1256 else
1257 {
1258 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1259 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1260 }
1261 }
1262# if defined(IN_RC)
1263 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1264 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1265# endif
1266 return rc;
1267
1268#else /* guest real and protected mode */
1269 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1270 return VINF_SUCCESS;
1271#endif
1272}
1273
1274
1275/**
1276 * Update the tracking of shadowed pages.
1277 *
1278 * @param pVCpu The VMCPU handle.
1279 * @param pShwPage The shadow page.
1280 * @param HCPhys The physical page we is being dereferenced.
1281 */
1282DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1283{
1284 PVM pVM = pVCpu->CTX_SUFF(pVM);
1285
1286 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1287 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1288
1289 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1290 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1291 * 2. write protect all shadowed pages. I.e. implement caching.
1292 */
1293 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1294
1295 /*
1296 * Find the guest address.
1297 */
1298 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1299 pRam;
1300 pRam = pRam->CTX_SUFF(pNext))
1301 {
1302 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1303 while (iPage-- > 0)
1304 {
1305 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1306 {
1307 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1308
1309 Assert(pShwPage->cPresent);
1310 Assert(pPool->cPresent);
1311 pShwPage->cPresent--;
1312 pPool->cPresent--;
1313
1314 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1315 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1316 return;
1317 }
1318 }
1319 }
1320
1321 for (;;)
1322 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1323}
1324
1325
1326/**
1327 * Update the tracking of shadowed pages.
1328 *
1329 * @param pVCpu The VMCPU handle.
1330 * @param pShwPage The shadow page.
1331 * @param u16 The top 16-bit of the pPage->HCPhys.
1332 * @param pPage Pointer to the guest page. this will be modified.
1333 * @param iPTDst The index into the shadow table.
1334 */
1335DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1336{
1337 PVM pVM = pVCpu->CTX_SUFF(pVM);
1338 /*
1339 * Just deal with the simple first time here.
1340 */
1341 if (!u16)
1342 {
1343 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1344 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1345 }
1346 else
1347 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1348
1349 /* write back */
1350 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1351 PGM_PAGE_SET_TRACKING(pPage, u16);
1352
1353 /* update statistics. */
1354 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1355 pShwPage->cPresent++;
1356 if (pShwPage->iFirstPresent > iPTDst)
1357 pShwPage->iFirstPresent = iPTDst;
1358}
1359
1360
1361/**
1362 * Creates a 4K shadow page for a guest page.
1363 *
1364 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1365 * physical address. The PdeSrc argument only the flags are used. No page structured
1366 * will be mapped in this function.
1367 *
1368 * @param pVCpu The VMCPU handle.
1369 * @param pPteDst Destination page table entry.
1370 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1371 * Can safely assume that only the flags are being used.
1372 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1373 * @param pShwPage Pointer to the shadow page.
1374 * @param iPTDst The index into the shadow table.
1375 *
1376 * @remark Not used for 2/4MB pages!
1377 */
1378DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1379{
1380 if (PteSrc.n.u1Present)
1381 {
1382 PVM pVM = pVCpu->CTX_SUFF(pVM);
1383
1384# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1385 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1386 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64)
1387 if (pShwPage->fDirty)
1388 {
1389 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1390 PX86PTPAE pGstPT;
1391
1392 pGstPT = (PX86PTPAE)&pPool->aDirtyPages[pShwPage->idxDirty][0];
1393 pGstPT->a[iPTDst].u = PteSrc.u;
1394 }
1395# endif
1396 /*
1397 * Find the ram range.
1398 */
1399 PPGMPAGE pPage;
1400 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1401 if (RT_SUCCESS(rc))
1402 {
1403#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1404 /* Try make the page writable if necessary. */
1405 if ( PteSrc.n.u1Write
1406 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1407# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1408 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1409# endif
1410 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1411 {
1412 rc = pgmPhysPageMakeWritable(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1413 AssertRC(rc);
1414 }
1415#endif
1416
1417 /** @todo investiage PWT, PCD and PAT. */
1418 /*
1419 * Make page table entry.
1420 */
1421 SHWPTE PteDst;
1422 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1423 {
1424 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1425 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1426 {
1427#if PGM_SHW_TYPE == PGM_TYPE_EPT
1428 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1429 PteDst.n.u1Present = 1;
1430 PteDst.n.u1Execute = 1;
1431 PteDst.n.u1IgnorePAT = 1;
1432 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1433 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1434#else
1435 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1436 | PGM_PAGE_GET_HCPHYS(pPage);
1437#endif
1438 }
1439 else
1440 {
1441 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1442 PteDst.u = 0;
1443 }
1444 /** @todo count these two kinds. */
1445 }
1446 else
1447 {
1448#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1449 /*
1450 * If the page or page directory entry is not marked accessed,
1451 * we mark the page not present.
1452 */
1453 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1454 {
1455 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1456 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1457 PteDst.u = 0;
1458 }
1459 else
1460 /*
1461 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1462 * when the page is modified.
1463 */
1464 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1465 {
1466 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1467 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1468 | PGM_PAGE_GET_HCPHYS(pPage)
1469 | PGM_PTFLAGS_TRACK_DIRTY;
1470 }
1471 else
1472#endif
1473 {
1474 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1475#if PGM_SHW_TYPE == PGM_TYPE_EPT
1476 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1477 PteDst.n.u1Present = 1;
1478 PteDst.n.u1Write = 1;
1479 PteDst.n.u1Execute = 1;
1480 PteDst.n.u1IgnorePAT = 1;
1481 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1482 /* PteDst.n.u1Size = 0 */
1483#else
1484 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1485 | PGM_PAGE_GET_HCPHYS(pPage);
1486#endif
1487 }
1488 }
1489
1490 /*
1491 * Make sure only allocated pages are mapped writable.
1492 */
1493 if ( PteDst.n.u1Write
1494 && PteDst.n.u1Present
1495 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1496 {
1497 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1498 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1499 }
1500
1501 /*
1502 * Keep user track up to date.
1503 */
1504 if (PteDst.n.u1Present)
1505 {
1506 if (!pPteDst->n.u1Present)
1507 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1508 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1509 {
1510 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1511 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1512 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1513 }
1514 }
1515 else if (pPteDst->n.u1Present)
1516 {
1517 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1518 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1519 }
1520
1521 /*
1522 * Update statistics and commit the entry.
1523 */
1524#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1525 if (!PteSrc.n.u1Global)
1526 pShwPage->fSeenNonGlobal = true;
1527#endif
1528 ASMAtomicWriteSize(pPteDst, PteDst.u);
1529 }
1530 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1531 /** @todo count these. */
1532 }
1533 else
1534 {
1535 /*
1536 * Page not-present.
1537 */
1538 Log2(("SyncPageWorker: page not present in Pte\n"));
1539 /* Keep user track up to date. */
1540 if (pPteDst->n.u1Present)
1541 {
1542 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1543 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1544 }
1545 ASMAtomicWriteSize(pPteDst, 0);
1546 /** @todo count these. */
1547 }
1548}
1549
1550
1551/**
1552 * Syncs a guest OS page.
1553 *
1554 * There are no conflicts at this point, neither is there any need for
1555 * page table allocations.
1556 *
1557 * @returns VBox status code.
1558 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1559 * @param pVCpu The VMCPU handle.
1560 * @param PdeSrc Page directory entry of the guest.
1561 * @param GCPtrPage Guest context page address.
1562 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1563 * @param uErr Fault error (X86_TRAP_PF_*).
1564 */
1565PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1566{
1567 PVM pVM = pVCpu->CTX_SUFF(pVM);
1568 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1569 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1570
1571 Assert(PGMIsLockOwner(pVM));
1572
1573#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1574 || PGM_GST_TYPE == PGM_TYPE_PAE \
1575 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1576 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1577 && PGM_SHW_TYPE != PGM_TYPE_EPT
1578
1579 /*
1580 * Assert preconditions.
1581 */
1582 Assert(PdeSrc.n.u1Present);
1583 Assert(cPages);
1584# if 0 /* rarely useful; leave for debugging. */
1585 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1586# endif
1587
1588 /*
1589 * Get the shadow PDE, find the shadow page table in the pool.
1590 */
1591# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1592 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1593 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1594
1595 /* Fetch the pgm pool shadow descriptor. */
1596 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1597 Assert(pShwPde);
1598
1599# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1600 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1601 PPGMPOOLPAGE pShwPde = NULL;
1602 PX86PDPAE pPDDst;
1603
1604 /* Fetch the pgm pool shadow descriptor. */
1605 int rc2 = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1606 AssertRCSuccessReturn(rc2, rc2);
1607 Assert(pShwPde);
1608
1609 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1610 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1611
1612# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1613 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1614 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1615 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1616 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1617
1618 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1619 AssertRCSuccessReturn(rc2, rc2);
1620 Assert(pPDDst && pPdptDst);
1621 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1622# endif
1623 SHWPDE PdeDst = *pPdeDst;
1624
1625 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1626 if (!PdeDst.n.u1Present)
1627 {
1628 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE p=%p/%RX64\n", pPdeDst, (uint64_t)PdeDst.u));
1629 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1630 return VINF_SUCCESS; /* force the instruction to be executed again. */
1631 }
1632
1633 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1634 Assert(pShwPage);
1635
1636# if PGM_GST_TYPE == PGM_TYPE_AMD64
1637 /* Fetch the pgm pool shadow descriptor. */
1638 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1639 Assert(pShwPde);
1640# endif
1641
1642# if defined(IN_RC)
1643 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1644 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1645# endif
1646
1647 /*
1648 * Check that the page is present and that the shadow PDE isn't out of sync.
1649 */
1650# if PGM_GST_TYPE == PGM_TYPE_32BIT
1651 const bool fBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1652# else
1653 const bool fBigPage = PdeSrc.b.u1Size;
1654# endif
1655 RTGCPHYS GCPhys;
1656 if (!fBigPage)
1657 {
1658 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1659# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1660 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1661 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1662# endif
1663 }
1664 else
1665 {
1666 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1667# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1668 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1669 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1670# endif
1671 }
1672 if ( pShwPage->GCPhys == GCPhys
1673 && PdeSrc.n.u1Present
1674 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1675 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1676# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1677 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !CPUMIsGuestNXEnabled(pVCpu))
1678# endif
1679 )
1680 {
1681 /*
1682 * Check that the PDE is marked accessed already.
1683 * Since we set the accessed bit *before* getting here on a #PF, this
1684 * check is only meant for dealing with non-#PF'ing paths.
1685 */
1686 if (PdeSrc.n.u1Accessed)
1687 {
1688 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1689 if (!fBigPage)
1690 {
1691 /*
1692 * 4KB Page - Map the guest page table.
1693 */
1694 PGSTPT pPTSrc;
1695 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1696 if (RT_SUCCESS(rc))
1697 {
1698# ifdef PGM_SYNC_N_PAGES
1699 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1700 if ( cPages > 1
1701 && !(uErr & X86_TRAP_PF_P)
1702 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1703 {
1704 /*
1705 * This code path is currently only taken when the caller is PGMTrap0eHandler
1706 * for non-present pages!
1707 *
1708 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1709 * deal with locality.
1710 */
1711 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1712# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1713 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1714 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1715# else
1716 const unsigned offPTSrc = 0;
1717# endif
1718 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1719 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1720 iPTDst = 0;
1721 else
1722 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1723 for (; iPTDst < iPTDstEnd; iPTDst++)
1724 {
1725 if (!pPTDst->a[iPTDst].n.u1Present)
1726 {
1727 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1728 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1729 NOREF(GCPtrCurPage);
1730#ifndef IN_RING0
1731 /*
1732 * Assuming kernel code will be marked as supervisor - and not as user level
1733 * and executed using a conforming code selector - And marked as readonly.
1734 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1735 */
1736 PPGMPAGE pPage;
1737 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1738 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1739 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
1740 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1741 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1742 )
1743#endif /* else: CSAM not active */
1744 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1745 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1746 GCPtrCurPage, PteSrc.n.u1Present,
1747 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1748 PteSrc.n.u1User & PdeSrc.n.u1User,
1749 (uint64_t)PteSrc.u,
1750 (uint64_t)pPTDst->a[iPTDst].u,
1751 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1752 }
1753 }
1754 }
1755 else
1756# endif /* PGM_SYNC_N_PAGES */
1757 {
1758 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1759 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1760 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1761 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1762 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1763 GCPtrPage, PteSrc.n.u1Present,
1764 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1765 PteSrc.n.u1User & PdeSrc.n.u1User,
1766 (uint64_t)PteSrc.u,
1767 (uint64_t)pPTDst->a[iPTDst].u,
1768 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1769 }
1770 }
1771 else /* MMIO or invalid page: emulated in #PF handler. */
1772 {
1773 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1774 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1775 }
1776 }
1777 else
1778 {
1779 /*
1780 * 4/2MB page - lazy syncing shadow 4K pages.
1781 * (There are many causes of getting here, it's no longer only CSAM.)
1782 */
1783 /* Calculate the GC physical address of this 4KB shadow page. */
1784 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1785 /* Find ram range. */
1786 PPGMPAGE pPage;
1787 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1788 if (RT_SUCCESS(rc))
1789 {
1790# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1791 /* Try make the page writable if necessary. */
1792 if ( PdeSrc.n.u1Write
1793 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1794# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1795 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1796# endif
1797 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1798 {
1799 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1800 AssertRC(rc);
1801 }
1802# endif
1803
1804 /*
1805 * Make shadow PTE entry.
1806 */
1807 SHWPTE PteDst;
1808 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1809 | PGM_PAGE_GET_HCPHYS(pPage);
1810 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1811 {
1812 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1813 PteDst.n.u1Write = 0;
1814 else
1815 PteDst.u = 0;
1816 }
1817 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1818 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1819 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1820
1821 /* Make sure only allocated pages are mapped writable. */
1822 if ( PteDst.n.u1Write
1823 && PteDst.n.u1Present
1824 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1825 {
1826 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1827 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1828 }
1829
1830 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1831
1832 /*
1833 * If the page is not flagged as dirty and is writable, then make it read-only
1834 * at PD level, so we can set the dirty bit when the page is modified.
1835 *
1836 * ASSUMES that page access handlers are implemented on page table entry level.
1837 * Thus we will first catch the dirty access and set PDE.D and restart. If
1838 * there is an access handler, we'll trap again and let it work on the problem.
1839 */
1840 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1841 * As for invlpg, it simply frees the whole shadow PT.
1842 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1843 if ( !PdeSrc.b.u1Dirty
1844 && PdeSrc.b.u1Write)
1845 {
1846 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1847 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1848 PdeDst.n.u1Write = 0;
1849 }
1850 else
1851 {
1852 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1853 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1854 }
1855 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1856 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1857 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1858 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1859 }
1860 else
1861 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1862 }
1863# if defined(IN_RC)
1864 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1865 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1866# endif
1867 return VINF_SUCCESS;
1868 }
1869 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1870 }
1871 else
1872 {
1873 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1874 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1875 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1876 }
1877
1878 /*
1879 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1880 * Yea, I'm lazy.
1881 */
1882 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1883 ASMAtomicWriteSize(pPdeDst, 0);
1884
1885# if defined(IN_RC)
1886 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1887 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1888# endif
1889 PGM_INVL_VCPU_TLBS(pVCpu);
1890 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1891
1892#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1893 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1894 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1895 && !defined(IN_RC)
1896
1897# ifdef PGM_SYNC_N_PAGES
1898 /*
1899 * Get the shadow PDE, find the shadow page table in the pool.
1900 */
1901# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1902 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
1903
1904# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1905 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
1906
1907# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1908 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1909 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1910 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1911 X86PDEPAE PdeDst;
1912 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1913
1914 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1915 AssertRCSuccessReturn(rc, rc);
1916 Assert(pPDDst && pPdptDst);
1917 PdeDst = pPDDst->a[iPDDst];
1918# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1919 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1920 PEPTPD pPDDst;
1921 EPTPDE PdeDst;
1922
1923 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
1924 if (rc != VINF_SUCCESS)
1925 {
1926 AssertRC(rc);
1927 return rc;
1928 }
1929 Assert(pPDDst);
1930 PdeDst = pPDDst->a[iPDDst];
1931# endif
1932 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1933 if (!PdeDst.n.u1Present)
1934 {
1935 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
1936 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1937 return VINF_SUCCESS; /* force the instruction to be executed again. */
1938 }
1939
1940 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
1941 if (PdeDst.n.u1Size)
1942 {
1943 Assert(HWACCMIsNestedPagingActive(pVM));
1944 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
1945 return VINF_SUCCESS;
1946 }
1947
1948 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1949 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1950
1951 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1952 if ( cPages > 1
1953 && !(uErr & X86_TRAP_PF_P)
1954 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1955 {
1956 /*
1957 * This code path is currently only taken when the caller is PGMTrap0eHandler
1958 * for non-present pages!
1959 *
1960 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1961 * deal with locality.
1962 */
1963 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1964 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1965 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1966 iPTDst = 0;
1967 else
1968 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1969 for (; iPTDst < iPTDstEnd; iPTDst++)
1970 {
1971 if (!pPTDst->a[iPTDst].n.u1Present)
1972 {
1973 GSTPTE PteSrc;
1974
1975 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
1976
1977 /* Fake the page table entry */
1978 PteSrc.u = GCPtrCurPage;
1979 PteSrc.n.u1Present = 1;
1980 PteSrc.n.u1Dirty = 1;
1981 PteSrc.n.u1Accessed = 1;
1982 PteSrc.n.u1Write = 1;
1983 PteSrc.n.u1User = 1;
1984
1985 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1986
1987 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1988 GCPtrCurPage, PteSrc.n.u1Present,
1989 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1990 PteSrc.n.u1User & PdeSrc.n.u1User,
1991 (uint64_t)PteSrc.u,
1992 (uint64_t)pPTDst->a[iPTDst].u,
1993 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1994
1995 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
1996 break;
1997 }
1998 else
1999 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2000 }
2001 }
2002 else
2003# endif /* PGM_SYNC_N_PAGES */
2004 {
2005 GSTPTE PteSrc;
2006 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2007 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2008
2009 /* Fake the page table entry */
2010 PteSrc.u = GCPtrCurPage;
2011 PteSrc.n.u1Present = 1;
2012 PteSrc.n.u1Dirty = 1;
2013 PteSrc.n.u1Accessed = 1;
2014 PteSrc.n.u1Write = 1;
2015 PteSrc.n.u1User = 1;
2016 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2017
2018 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2019 GCPtrPage, PteSrc.n.u1Present,
2020 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2021 PteSrc.n.u1User & PdeSrc.n.u1User,
2022 (uint64_t)PteSrc.u,
2023 (uint64_t)pPTDst->a[iPTDst].u,
2024 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2025 }
2026 return VINF_SUCCESS;
2027
2028#else
2029 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2030 return VERR_INTERNAL_ERROR;
2031#endif
2032}
2033
2034
2035#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2036/**
2037 * Investigate page fault and handle write protection page faults caused by
2038 * dirty bit tracking.
2039 *
2040 * @returns VBox status code.
2041 * @param pVCpu The VMCPU handle.
2042 * @param uErr Page fault error code.
2043 * @param pPdeSrc Guest page directory entry.
2044 * @param GCPtrPage Guest context page address.
2045 */
2046PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2047{
2048 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2049 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2050 bool fMaybeWriteProtFault = fWriteFault && (fUserLevelFault || CPUMIsGuestR0WriteProtEnabled(pVCpu));
2051# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2052 bool fMaybeNXEFault = (uErr & X86_TRAP_PF_ID) && CPUMIsGuestNXEnabled(pVCpu);
2053# endif
2054 unsigned uPageFaultLevel;
2055 int rc;
2056 PVM pVM = pVCpu->CTX_SUFF(pVM);
2057
2058 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2059
2060# if PGM_GST_TYPE == PGM_TYPE_PAE \
2061 || PGM_GST_TYPE == PGM_TYPE_AMD64
2062
2063# if PGM_GST_TYPE == PGM_TYPE_AMD64
2064 PX86PML4E pPml4eSrc;
2065 PX86PDPE pPdpeSrc;
2066
2067 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc);
2068 Assert(pPml4eSrc);
2069
2070 /*
2071 * Real page fault? (PML4E level)
2072 */
2073 if ( (uErr & X86_TRAP_PF_RSVD)
2074 || !pPml4eSrc->n.u1Present
2075 || (fMaybeWriteProtFault && !pPml4eSrc->n.u1Write)
2076 || (fMaybeNXEFault && pPml4eSrc->n.u1NoExecute)
2077 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2078 )
2079 {
2080 uPageFaultLevel = 0;
2081 goto l_UpperLevelPageFault;
2082 }
2083 Assert(pPdpeSrc);
2084
2085# else /* PAE */
2086 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVCpu->pgm.s, GCPtrPage);
2087# endif /* PAE */
2088
2089 /*
2090 * Real page fault? (PDPE level)
2091 */
2092 if ( (uErr & X86_TRAP_PF_RSVD)
2093 || !pPdpeSrc->n.u1Present
2094# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2095 || (fMaybeWriteProtFault && !pPdpeSrc->lm.u1Write)
2096 || (fMaybeNXEFault && pPdpeSrc->lm.u1NoExecute)
2097 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2098# endif
2099 )
2100 {
2101 uPageFaultLevel = 1;
2102 goto l_UpperLevelPageFault;
2103 }
2104# endif
2105
2106 /*
2107 * Real page fault? (PDE level)
2108 */
2109 if ( (uErr & X86_TRAP_PF_RSVD)
2110 || !pPdeSrc->n.u1Present
2111 || (fMaybeWriteProtFault && !pPdeSrc->n.u1Write)
2112# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2113 || (fMaybeNXEFault && pPdeSrc->n.u1NoExecute)
2114# endif
2115 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2116 {
2117 uPageFaultLevel = 2;
2118 goto l_UpperLevelPageFault;
2119 }
2120
2121 /*
2122 * First check the easy case where the page directory has been marked read-only to track
2123 * the dirty bit of an emulated BIG page
2124 */
2125 if ( pPdeSrc->b.u1Size
2126# if PGM_GST_TYPE == PGM_TYPE_32BIT
2127 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2128# endif
2129 )
2130 {
2131 /* Mark guest page directory as accessed */
2132# if PGM_GST_TYPE == PGM_TYPE_AMD64
2133 pPml4eSrc->n.u1Accessed = 1;
2134 pPdpeSrc->lm.u1Accessed = 1;
2135# endif
2136 pPdeSrc->b.u1Accessed = 1;
2137
2138 /*
2139 * Only write protection page faults are relevant here.
2140 */
2141 if (fWriteFault)
2142 {
2143 /* Mark guest page directory as dirty (BIG page only). */
2144 pPdeSrc->b.u1Dirty = 1;
2145 }
2146 return VINF_SUCCESS;
2147 }
2148 /* else: 4KB page table */
2149
2150 /*
2151 * Map the guest page table.
2152 */
2153 PGSTPT pPTSrc;
2154 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2155 if (RT_SUCCESS(rc))
2156 {
2157 /*
2158 * Real page fault?
2159 */
2160 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2161 const GSTPTE PteSrc = *pPteSrc;
2162 if ( !PteSrc.n.u1Present
2163 || (fMaybeWriteProtFault && !PteSrc.n.u1Write)
2164# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2165 || (fMaybeNXEFault && PteSrc.n.u1NoExecute)
2166# endif
2167 || (fUserLevelFault && !PteSrc.n.u1User)
2168 )
2169 {
2170 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2171 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2172
2173 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2174 * See the 2nd case above as well.
2175 */
2176 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2177 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2178
2179 return VINF_EM_RAW_GUEST_TRAP;
2180 }
2181 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2182
2183 /*
2184 * Set the accessed bits in the page directory and the page table.
2185 */
2186# if PGM_GST_TYPE == PGM_TYPE_AMD64
2187 pPml4eSrc->n.u1Accessed = 1;
2188 pPdpeSrc->lm.u1Accessed = 1;
2189# endif
2190 pPdeSrc->n.u1Accessed = 1;
2191 pPteSrc->n.u1Accessed = 1;
2192
2193 /*
2194 * Only write protection page faults are relevant here.
2195 */
2196 if (fWriteFault)
2197 {
2198 /* Write access, so mark guest entry as dirty. */
2199# ifdef VBOX_WITH_STATISTICS
2200 if (!pPteSrc->n.u1Dirty)
2201 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2202 else
2203 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2204# endif
2205
2206 pPteSrc->n.u1Dirty = 1;
2207 }
2208 return VINF_SUCCESS;
2209 }
2210 AssertRC(rc);
2211 return rc;
2212
2213
2214l_UpperLevelPageFault:
2215 /*
2216 * Pagefault detected while checking the PML4E, PDPE or PDE.
2217 * Single exit handler to get rid of duplicate code paths.
2218 */
2219 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2220 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2221
2222 if ( 1
2223# if PGM_GST_TYPE == PGM_TYPE_AMD64
2224 && pPml4eSrc->n.u1Present
2225# endif
2226# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2227 && pPdpeSrc->n.u1Present
2228# endif
2229 && pPdeSrc->n.u1Present)
2230 {
2231 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2232 if ( pPdeSrc->b.u1Size
2233# if PGM_GST_TYPE == PGM_TYPE_32BIT
2234 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2235# endif
2236 )
2237 {
2238 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2239 }
2240 else
2241 {
2242 /*
2243 * Map the guest page table.
2244 */
2245 PGSTPT pPTSrc2;
2246 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc2);
2247 if (RT_SUCCESS(rc))
2248 {
2249 PGSTPTE pPteSrc = &pPTSrc2->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2250 if (pPteSrc->n.u1Present)
2251 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2252 }
2253 AssertRC(rc);
2254 }
2255 }
2256 return VINF_EM_RAW_GUEST_TRAP;
2257}
2258
2259/**
2260 * Handle dirty bit tracking faults.
2261 *
2262 * @returns VBox status code.
2263 * @param pVCpu The VMCPU handle.
2264 * @param uErr Page fault error code.
2265 * @param pPdeSrc Guest page directory entry.
2266 * @param pPdeDst Shadow page directory entry.
2267 * @param GCPtrPage Guest context page address.
2268 */
2269PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2270{
2271# if PGM_GST_TYPE == PGM_TYPE_32BIT
2272 const bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
2273# else
2274 const bool fBigPagesSupported = true;
2275# endif
2276 PVM pVM = pVCpu->CTX_SUFF(pVM);
2277 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2278
2279 Assert(PGMIsLockOwner(pVM));
2280
2281 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2282 {
2283 if ( pPdeDst->n.u1Present
2284 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2285 {
2286 SHWPDE PdeDst = *pPdeDst;
2287
2288 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2289 Assert(pPdeSrc->b.u1Write);
2290
2291 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2292 * fault again and take this path to only invalidate the entry.
2293 */
2294 PdeDst.n.u1Write = 1;
2295 PdeDst.n.u1Accessed = 1;
2296 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2297 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2298 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2299 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2300 }
2301# ifdef IN_RING0
2302 else
2303 /* Check for stale TLB entry; only applies to the SMP guest case. */
2304 if ( pVM->cCpus > 1
2305 && pPdeDst->n.u1Write
2306 && pPdeDst->n.u1Accessed)
2307 {
2308 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2309 if (pShwPage)
2310 {
2311 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2312 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2313 if ( pPteDst->n.u1Present
2314 && pPteDst->n.u1Write)
2315 {
2316 /* Stale TLB entry. */
2317 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2318 PGM_INVL_PG(pVCpu, GCPtrPage);
2319 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2320 }
2321 }
2322 }
2323# endif /* IN_RING0 */
2324 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2325 }
2326
2327 /*
2328 * Map the guest page table.
2329 */
2330 PGSTPT pPTSrc;
2331 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2332 if (RT_SUCCESS(rc))
2333 {
2334 if (pPdeDst->n.u1Present)
2335 {
2336 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2337 const GSTPTE PteSrc = *pPteSrc;
2338#ifndef IN_RING0
2339 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2340 * Our individual shadow handlers will provide more information and force a fatal exit.
2341 */
2342 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2343 {
2344 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2345 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2346 }
2347#endif
2348 /*
2349 * Map shadow page table.
2350 */
2351 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2352 if (pShwPage)
2353 {
2354 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2355 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2356 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2357 {
2358 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2359 {
2360 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2361 SHWPTE PteDst = *pPteDst;
2362
2363 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2364 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2365
2366 Assert(pPteSrc->n.u1Write);
2367
2368 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2369 * fault again and take this path to only invalidate the entry.
2370 */
2371 if (RT_LIKELY(pPage))
2372 {
2373 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2374 {
2375 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2376 PteDst.n.u1Write = 0;
2377 }
2378 else
2379 {
2380 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2381 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2382 {
2383 rc = pgmPhysPageMakeWritable(pVM, pPage, pPteSrc->u & GST_PTE_PG_MASK);
2384 AssertRC(rc);
2385 }
2386 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2387 PteDst.n.u1Write = 1;
2388 else
2389 PteDst.n.u1Write = 0;
2390 }
2391 }
2392 else
2393 PteDst.n.u1Write = 1;
2394
2395 PteDst.n.u1Dirty = 1;
2396 PteDst.n.u1Accessed = 1;
2397 PteDst.au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2398 ASMAtomicWriteSize(pPteDst, PteDst.u);
2399 PGM_INVL_PG(pVCpu, GCPtrPage);
2400 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2401 }
2402# ifdef IN_RING0
2403 else
2404 /* Check for stale TLB entry; only applies to the SMP guest case. */
2405 if ( pVM->cCpus > 1
2406 && pPteDst->n.u1Write == 1
2407 && pPteDst->n.u1Accessed == 1)
2408 {
2409 /* Stale TLB entry. */
2410 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2411 PGM_INVL_PG(pVCpu, GCPtrPage);
2412 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2413 }
2414# endif
2415 }
2416 }
2417 else
2418 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2419 }
2420 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2421 }
2422 AssertRC(rc);
2423 return rc;
2424}
2425#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2426
2427
2428/**
2429 * Sync a shadow page table.
2430 *
2431 * The shadow page table is not present. This includes the case where
2432 * there is a conflict with a mapping.
2433 *
2434 * @returns VBox status code.
2435 * @param pVCpu The VMCPU handle.
2436 * @param iPD Page directory index.
2437 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2438 * Assume this is a temporary mapping.
2439 * @param GCPtrPage GC Pointer of the page that caused the fault
2440 */
2441PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2442{
2443 PVM pVM = pVCpu->CTX_SUFF(pVM);
2444 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2445
2446 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2447#if 0 /* rarely useful; leave for debugging. */
2448 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2449#endif
2450 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2451
2452 Assert(PGMIsLocked(pVM));
2453
2454#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2455 || PGM_GST_TYPE == PGM_TYPE_PAE \
2456 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2457 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2458 && PGM_SHW_TYPE != PGM_TYPE_EPT
2459
2460 int rc = VINF_SUCCESS;
2461
2462 /*
2463 * Validate input a little bit.
2464 */
2465 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2466# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2467 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2468 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2469
2470 /* Fetch the pgm pool shadow descriptor. */
2471 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2472 Assert(pShwPde);
2473
2474# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2475 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2476 PPGMPOOLPAGE pShwPde = NULL;
2477 PX86PDPAE pPDDst;
2478 PSHWPDE pPdeDst;
2479
2480 /* Fetch the pgm pool shadow descriptor. */
2481 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2482 AssertRCSuccessReturn(rc, rc);
2483 Assert(pShwPde);
2484
2485 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2486 pPdeDst = &pPDDst->a[iPDDst];
2487
2488# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2489 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2490 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2491 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2492 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2493 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2494 AssertRCSuccessReturn(rc, rc);
2495 Assert(pPDDst);
2496 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2497# endif
2498 SHWPDE PdeDst = *pPdeDst;
2499
2500# if PGM_GST_TYPE == PGM_TYPE_AMD64
2501 /* Fetch the pgm pool shadow descriptor. */
2502 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2503 Assert(pShwPde);
2504# endif
2505
2506# ifndef PGM_WITHOUT_MAPPINGS
2507 /*
2508 * Check for conflicts.
2509 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2510 * HC: Simply resolve the conflict.
2511 */
2512 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2513 {
2514 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2515# ifndef IN_RING3
2516 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2517 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2518 return VERR_ADDRESS_CONFLICT;
2519# else
2520 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2521 Assert(pMapping);
2522# if PGM_GST_TYPE == PGM_TYPE_32BIT
2523 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2524# elif PGM_GST_TYPE == PGM_TYPE_PAE
2525 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2526# else
2527 AssertFailed(); /* can't happen for amd64 */
2528# endif
2529 if (RT_FAILURE(rc))
2530 {
2531 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2532 return rc;
2533 }
2534 PdeDst = *pPdeDst;
2535# endif
2536 }
2537# endif /* !PGM_WITHOUT_MAPPINGS */
2538 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2539
2540# if defined(IN_RC)
2541 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2542 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2543# endif
2544
2545 /*
2546 * Sync page directory entry.
2547 */
2548 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2549 if (PdeSrc.n.u1Present)
2550 {
2551 /*
2552 * Allocate & map the page table.
2553 */
2554 PSHWPT pPTDst;
2555# if PGM_GST_TYPE == PGM_TYPE_32BIT
2556 const bool fPageTable = !PdeSrc.b.u1Size || !CPUMIsGuestPageSizeExtEnabled(pVCpu);
2557# else
2558 const bool fPageTable = !PdeSrc.b.u1Size;
2559# endif
2560 PPGMPOOLPAGE pShwPage;
2561 RTGCPHYS GCPhys;
2562 if (fPageTable)
2563 {
2564 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2565# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2566 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2567 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2568# endif
2569 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2570 }
2571 else
2572 {
2573 PGMPOOLACCESS enmAccess;
2574# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2575 const bool fNoExecute = PdeSrc.n.u1NoExecute && CPUMIsGuestNXEnabled(pVCpu);
2576# else
2577 const bool fNoExecute = false;
2578# endif
2579
2580 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2581# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2582 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2583 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2584# endif
2585 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2586 if (PdeSrc.n.u1User)
2587 {
2588 if (PdeSrc.n.u1Write)
2589 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2590 else
2591 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2592 }
2593 else
2594 {
2595 if (PdeSrc.n.u1Write)
2596 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2597 else
2598 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2599 }
2600 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2601 }
2602 if (rc == VINF_SUCCESS)
2603 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2604 else if (rc == VINF_PGM_CACHED_PAGE)
2605 {
2606 /*
2607 * The PT was cached, just hook it up.
2608 */
2609 if (fPageTable)
2610 PdeDst.u = pShwPage->Core.Key
2611 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2612 else
2613 {
2614 PdeDst.u = pShwPage->Core.Key
2615 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2616 /* (see explanation and assumptions further down.) */
2617 if ( !PdeSrc.b.u1Dirty
2618 && PdeSrc.b.u1Write)
2619 {
2620 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2621 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2622 PdeDst.b.u1Write = 0;
2623 }
2624 }
2625 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2626# if defined(IN_RC)
2627 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2628# endif
2629 return VINF_SUCCESS;
2630 }
2631 else if (rc == VERR_PGM_POOL_FLUSHED)
2632 {
2633 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2634# if defined(IN_RC)
2635 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2636# endif
2637 return VINF_PGM_SYNC_CR3;
2638 }
2639 else
2640 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2641 PdeDst.u &= X86_PDE_AVL_MASK;
2642 PdeDst.u |= pShwPage->Core.Key;
2643
2644 /*
2645 * Page directory has been accessed (this is a fault situation, remember).
2646 */
2647 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2648 if (fPageTable)
2649 {
2650 /*
2651 * Page table - 4KB.
2652 *
2653 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2654 */
2655 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2656 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2657 PGSTPT pPTSrc;
2658 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2659 if (RT_SUCCESS(rc))
2660 {
2661 /*
2662 * Start by syncing the page directory entry so CSAM's TLB trick works.
2663 */
2664 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2665 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2666 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2667# if defined(IN_RC)
2668 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2669# endif
2670
2671 /*
2672 * Directory/page user or supervisor privilege: (same goes for read/write)
2673 *
2674 * Directory Page Combined
2675 * U/S U/S U/S
2676 * 0 0 0
2677 * 0 1 0
2678 * 1 0 0
2679 * 1 1 1
2680 *
2681 * Simple AND operation. Table listed for completeness.
2682 *
2683 */
2684 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2685# ifdef PGM_SYNC_N_PAGES
2686 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2687 unsigned iPTDst = iPTBase;
2688 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2689 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2690 iPTDst = 0;
2691 else
2692 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2693# else /* !PGM_SYNC_N_PAGES */
2694 unsigned iPTDst = 0;
2695 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2696# endif /* !PGM_SYNC_N_PAGES */
2697# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2698 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2699 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2700# else
2701 const unsigned offPTSrc = 0;
2702# endif
2703 for (; iPTDst < iPTDstEnd; iPTDst++)
2704 {
2705 const unsigned iPTSrc = iPTDst + offPTSrc;
2706 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2707
2708 if (PteSrc.n.u1Present) /* we've already cleared it above */
2709 {
2710# ifndef IN_RING0
2711 /*
2712 * Assuming kernel code will be marked as supervisor - and not as user level
2713 * and executed using a conforming code selector - And marked as readonly.
2714 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2715 */
2716 PPGMPAGE pPage;
2717 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2718 || !CSAMDoesPageNeedScanning(pVM, (iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT))
2719 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2720 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2721 )
2722# endif
2723 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2724 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2725 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2726 PteSrc.n.u1Present,
2727 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2728 PteSrc.n.u1User & PdeSrc.n.u1User,
2729 (uint64_t)PteSrc.u,
2730 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2731 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2732 }
2733 } /* for PTEs */
2734 }
2735 }
2736 else
2737 {
2738 /*
2739 * Big page - 2/4MB.
2740 *
2741 * We'll walk the ram range list in parallel and optimize lookups.
2742 * We will only sync on shadow page table at a time.
2743 */
2744 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2745
2746 /**
2747 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2748 */
2749
2750 /*
2751 * Start by syncing the page directory entry.
2752 */
2753 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2754 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2755
2756 /*
2757 * If the page is not flagged as dirty and is writable, then make it read-only
2758 * at PD level, so we can set the dirty bit when the page is modified.
2759 *
2760 * ASSUMES that page access handlers are implemented on page table entry level.
2761 * Thus we will first catch the dirty access and set PDE.D and restart. If
2762 * there is an access handler, we'll trap again and let it work on the problem.
2763 */
2764 /** @todo move the above stuff to a section in the PGM documentation. */
2765 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2766 if ( !PdeSrc.b.u1Dirty
2767 && PdeSrc.b.u1Write)
2768 {
2769 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2770 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2771 PdeDst.b.u1Write = 0;
2772 }
2773 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2774# if defined(IN_RC)
2775 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2776# endif
2777
2778 /*
2779 * Fill the shadow page table.
2780 */
2781 /* Get address and flags from the source PDE. */
2782 SHWPTE PteDstBase;
2783 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2784
2785 /* Loop thru the entries in the shadow PT. */
2786 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2787 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2788 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2789 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2790 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2791 unsigned iPTDst = 0;
2792 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2793 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2794 {
2795 /* Advance ram range list. */
2796 while (pRam && GCPhys > pRam->GCPhysLast)
2797 pRam = pRam->CTX_SUFF(pNext);
2798 if (pRam && GCPhys >= pRam->GCPhys)
2799 {
2800 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2801 do
2802 {
2803 /* Make shadow PTE. */
2804 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2805 SHWPTE PteDst;
2806
2807# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2808 /* Try make the page writable if necessary. */
2809 if ( PteDstBase.n.u1Write
2810 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2811# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2812 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2813# endif
2814 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2815 {
2816 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2817 AssertRCReturn(rc, rc);
2818 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2819 break;
2820 }
2821# endif
2822
2823 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2824 {
2825 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2826 {
2827 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2828 PteDst.n.u1Write = 0;
2829 }
2830 else
2831 PteDst.u = 0;
2832 }
2833# ifndef IN_RING0
2834 /*
2835 * Assuming kernel code will be marked as supervisor and not as user level and executed
2836 * using a conforming code selector. Don't check for readonly, as that implies the whole
2837 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2838 */
2839 else if ( !PdeSrc.n.u1User
2840 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
2841 PteDst.u = 0;
2842# endif
2843 else
2844 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2845
2846 /* Only map writable pages writable. */
2847 if ( PteDst.n.u1Write
2848 && PteDst.n.u1Present
2849 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2850 {
2851 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2852 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2853 }
2854
2855 if (PteDst.n.u1Present)
2856 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2857
2858 /* commit it */
2859 pPTDst->a[iPTDst] = PteDst;
2860 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2861 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2862 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2863
2864 /* advance */
2865 GCPhys += PAGE_SIZE;
2866 iHCPage++;
2867 iPTDst++;
2868 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2869 && GCPhys <= pRam->GCPhysLast);
2870 }
2871 else if (pRam)
2872 {
2873 Log(("Invalid pages at %RGp\n", GCPhys));
2874 do
2875 {
2876 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2877 GCPhys += PAGE_SIZE;
2878 iPTDst++;
2879 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2880 && GCPhys < pRam->GCPhys);
2881 }
2882 else
2883 {
2884 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2885 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2886 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2887 }
2888 } /* while more PTEs */
2889 } /* 4KB / 4MB */
2890 }
2891 else
2892 AssertRelease(!PdeDst.n.u1Present);
2893
2894 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2895 if (RT_FAILURE(rc))
2896 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2897 return rc;
2898
2899#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2900 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2901 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2902 && !defined(IN_RC)
2903
2904 /*
2905 * Validate input a little bit.
2906 */
2907 int rc = VINF_SUCCESS;
2908# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2909 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2910 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2911
2912 /* Fetch the pgm pool shadow descriptor. */
2913 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2914 Assert(pShwPde);
2915
2916# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2917 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2918 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
2919 PX86PDPAE pPDDst;
2920 PSHWPDE pPdeDst;
2921
2922 /* Fetch the pgm pool shadow descriptor. */
2923 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2924 AssertRCSuccessReturn(rc, rc);
2925 Assert(pShwPde);
2926
2927 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2928 pPdeDst = &pPDDst->a[iPDDst];
2929
2930# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2931 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2932 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2933 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2934 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
2935 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2936 AssertRCSuccessReturn(rc, rc);
2937 Assert(pPDDst);
2938 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2939
2940 /* Fetch the pgm pool shadow descriptor. */
2941 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2942 Assert(pShwPde);
2943
2944# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2945 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2946 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2947 PEPTPD pPDDst;
2948 PEPTPDPT pPdptDst;
2949
2950 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
2951 if (rc != VINF_SUCCESS)
2952 {
2953 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2954 AssertRC(rc);
2955 return rc;
2956 }
2957 Assert(pPDDst);
2958 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2959
2960 /* Fetch the pgm pool shadow descriptor. */
2961 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2962 Assert(pShwPde);
2963# endif
2964 SHWPDE PdeDst = *pPdeDst;
2965
2966 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2967 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2968
2969# if defined(PGM_WITH_LARGE_PAGES) && (PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE)
2970# if (PGM_SHW_TYPE != PGM_TYPE_EPT) /* PGM_TYPE_EPT implies nested paging */
2971 if (HWACCMIsNestedPagingActive(pVM))
2972# endif
2973 {
2974 PPGMPAGE pPage;
2975
2976 /* Check if we allocated a big page before for this 2 MB range. */
2977 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
2978 if (RT_SUCCESS(rc))
2979 {
2980 RTHCPHYS HCPhys = NIL_RTHCPHYS;
2981
2982 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
2983 {
2984 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
2985 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2986 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2987 }
2988 else
2989 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
2990 {
2991 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
2992 rc = pgmPhysIsValidLargePage(pVM, GCPtrPage, pPage);
2993 if (RT_SUCCESS(rc))
2994 {
2995 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2996 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
2997 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2998 }
2999 }
3000 else
3001 if (PGMIsUsingLargePages(pVM))
3002 {
3003 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3004 if (RT_SUCCESS(rc))
3005 {
3006 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3007 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3008 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3009 }
3010 else
3011 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3012 }
3013
3014 if (HCPhys != NIL_RTHCPHYS)
3015 {
3016 PdeDst.u &= X86_PDE_AVL_MASK;
3017 PdeDst.u |= HCPhys;
3018 PdeDst.n.u1Present = 1;
3019 PdeDst.n.u1Write = 1;
3020 PdeDst.b.u1Size = 1;
3021# if PGM_SHW_TYPE == PGM_TYPE_EPT
3022 PdeDst.n.u1Execute = 1;
3023 PdeDst.b.u1IgnorePAT = 1;
3024 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3025# else
3026 PdeDst.n.u1User = 1;
3027# endif
3028 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3029
3030 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3031 /* Add a reference to the first page only. */
3032 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3033
3034 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3035 return VINF_SUCCESS;
3036 }
3037 }
3038 }
3039# endif /* HC_ARCH_BITS == 64 */
3040
3041 GSTPDE PdeSrc;
3042 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3043 PdeSrc.n.u1Present = 1;
3044 PdeSrc.n.u1Write = 1;
3045 PdeSrc.n.u1Accessed = 1;
3046 PdeSrc.n.u1User = 1;
3047
3048 /*
3049 * Allocate & map the page table.
3050 */
3051 PSHWPT pPTDst;
3052 PPGMPOOLPAGE pShwPage;
3053 RTGCPHYS GCPhys;
3054
3055 /* Virtual address = physical address */
3056 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3057 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3058
3059 if ( rc == VINF_SUCCESS
3060 || rc == VINF_PGM_CACHED_PAGE)
3061 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3062 else
3063 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3064
3065 PdeDst.u &= X86_PDE_AVL_MASK;
3066 PdeDst.u |= pShwPage->Core.Key;
3067 PdeDst.n.u1Present = 1;
3068 PdeDst.n.u1Write = 1;
3069# if PGM_SHW_TYPE == PGM_TYPE_EPT
3070 PdeDst.n.u1Execute = 1;
3071# else
3072 PdeDst.n.u1User = 1;
3073 PdeDst.n.u1Accessed = 1;
3074# endif
3075 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3076
3077 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3078 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3079 return rc;
3080
3081#else
3082 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3083 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3084 return VERR_INTERNAL_ERROR;
3085#endif
3086}
3087
3088
3089
3090/**
3091 * Prefetch a page/set of pages.
3092 *
3093 * Typically used to sync commonly used pages before entering raw mode
3094 * after a CR3 reload.
3095 *
3096 * @returns VBox status code.
3097 * @param pVCpu The VMCPU handle.
3098 * @param GCPtrPage Page to invalidate.
3099 */
3100PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3101{
3102#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3103 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3104 /*
3105 * Check that all Guest levels thru the PDE are present, getting the
3106 * PD and PDE in the processes.
3107 */
3108 int rc = VINF_SUCCESS;
3109# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3110# if PGM_GST_TYPE == PGM_TYPE_32BIT
3111 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3112 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3113# elif PGM_GST_TYPE == PGM_TYPE_PAE
3114 unsigned iPDSrc;
3115 X86PDPE PdpeSrc;
3116 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3117 if (!pPDSrc)
3118 return VINF_SUCCESS; /* not present */
3119# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3120 unsigned iPDSrc;
3121 PX86PML4E pPml4eSrc;
3122 X86PDPE PdpeSrc;
3123 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3124 if (!pPDSrc)
3125 return VINF_SUCCESS; /* not present */
3126# endif
3127 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3128# else
3129 PGSTPD pPDSrc = NULL;
3130 const unsigned iPDSrc = 0;
3131 GSTPDE PdeSrc;
3132
3133 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3134 PdeSrc.n.u1Present = 1;
3135 PdeSrc.n.u1Write = 1;
3136 PdeSrc.n.u1Accessed = 1;
3137 PdeSrc.n.u1User = 1;
3138# endif
3139
3140 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3141 {
3142 PVM pVM = pVCpu->CTX_SUFF(pVM);
3143 pgmLock(pVM);
3144
3145# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3146 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3147# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3148 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3149 PX86PDPAE pPDDst;
3150 X86PDEPAE PdeDst;
3151# if PGM_GST_TYPE != PGM_TYPE_PAE
3152 X86PDPE PdpeSrc;
3153
3154 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3155 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3156# endif
3157 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3158 if (rc != VINF_SUCCESS)
3159 {
3160 pgmUnlock(pVM);
3161 AssertRC(rc);
3162 return rc;
3163 }
3164 Assert(pPDDst);
3165 PdeDst = pPDDst->a[iPDDst];
3166
3167# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3168 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3169 PX86PDPAE pPDDst;
3170 X86PDEPAE PdeDst;
3171
3172# if PGM_GST_TYPE == PGM_TYPE_PROT
3173 /* AMD-V nested paging */
3174 X86PML4E Pml4eSrc;
3175 X86PDPE PdpeSrc;
3176 PX86PML4E pPml4eSrc = &Pml4eSrc;
3177
3178 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3179 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3180 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3181# endif
3182
3183 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3184 if (rc != VINF_SUCCESS)
3185 {
3186 pgmUnlock(pVM);
3187 AssertRC(rc);
3188 return rc;
3189 }
3190 Assert(pPDDst);
3191 PdeDst = pPDDst->a[iPDDst];
3192# endif
3193 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3194 {
3195 if (!PdeDst.n.u1Present)
3196 {
3197 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3198 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3199 }
3200 else
3201 {
3202 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3203 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3204 * makes no sense to prefetch more than one page.
3205 */
3206 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3207 if (RT_SUCCESS(rc))
3208 rc = VINF_SUCCESS;
3209 }
3210 }
3211 pgmUnlock(pVM);
3212 }
3213 return rc;
3214
3215#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3216 return VINF_SUCCESS; /* ignore */
3217#endif
3218}
3219
3220
3221
3222
3223/**
3224 * Syncs a page during a PGMVerifyAccess() call.
3225 *
3226 * @returns VBox status code (informational included).
3227 * @param pVCpu The VMCPU handle.
3228 * @param GCPtrPage The address of the page to sync.
3229 * @param fPage The effective guest page flags.
3230 * @param uErr The trap error code.
3231 */
3232PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3233{
3234 PVM pVM = pVCpu->CTX_SUFF(pVM);
3235
3236 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3237
3238 Assert(!HWACCMIsNestedPagingActive(pVM));
3239#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3240 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3241
3242# ifndef IN_RING0
3243 if (!(fPage & X86_PTE_US))
3244 {
3245 /*
3246 * Mark this page as safe.
3247 */
3248 /** @todo not correct for pages that contain both code and data!! */
3249 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3250 CSAMMarkPage(pVM, GCPtrPage, true);
3251 }
3252# endif
3253
3254 /*
3255 * Get guest PD and index.
3256 */
3257# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3258# if PGM_GST_TYPE == PGM_TYPE_32BIT
3259 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3260 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3261# elif PGM_GST_TYPE == PGM_TYPE_PAE
3262 unsigned iPDSrc = 0;
3263 X86PDPE PdpeSrc;
3264 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3265
3266 if (pPDSrc)
3267 {
3268 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3269 return VINF_EM_RAW_GUEST_TRAP;
3270 }
3271# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3272 unsigned iPDSrc;
3273 PX86PML4E pPml4eSrc;
3274 X86PDPE PdpeSrc;
3275 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3276 if (!pPDSrc)
3277 {
3278 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3279 return VINF_EM_RAW_GUEST_TRAP;
3280 }
3281# endif
3282# else
3283 PGSTPD pPDSrc = NULL;
3284 const unsigned iPDSrc = 0;
3285# endif
3286 int rc = VINF_SUCCESS;
3287
3288 pgmLock(pVM);
3289
3290 /*
3291 * First check if the shadow pd is present.
3292 */
3293# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3294 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3295# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3296 PX86PDEPAE pPdeDst;
3297 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3298 PX86PDPAE pPDDst;
3299# if PGM_GST_TYPE != PGM_TYPE_PAE
3300 X86PDPE PdpeSrc;
3301
3302 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3303 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3304# endif
3305 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3306 if (rc != VINF_SUCCESS)
3307 {
3308 pgmUnlock(pVM);
3309 AssertRC(rc);
3310 return rc;
3311 }
3312 Assert(pPDDst);
3313 pPdeDst = &pPDDst->a[iPDDst];
3314
3315# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3316 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3317 PX86PDPAE pPDDst;
3318 PX86PDEPAE pPdeDst;
3319
3320# if PGM_GST_TYPE == PGM_TYPE_PROT
3321 /* AMD-V nested paging */
3322 X86PML4E Pml4eSrc;
3323 X86PDPE PdpeSrc;
3324 PX86PML4E pPml4eSrc = &Pml4eSrc;
3325
3326 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3327 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3328 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3329# endif
3330
3331 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3332 if (rc != VINF_SUCCESS)
3333 {
3334 pgmUnlock(pVM);
3335 AssertRC(rc);
3336 return rc;
3337 }
3338 Assert(pPDDst);
3339 pPdeDst = &pPDDst->a[iPDDst];
3340# endif
3341
3342# if defined(IN_RC)
3343 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3344 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3345# endif
3346
3347 if (!pPdeDst->n.u1Present)
3348 {
3349 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3350 if (rc != VINF_SUCCESS)
3351 {
3352# if defined(IN_RC)
3353 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3354 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3355# endif
3356 pgmUnlock(pVM);
3357 AssertRC(rc);
3358 return rc;
3359 }
3360 }
3361
3362# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3363 /* Check for dirty bit fault */
3364 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3365 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3366 Log(("PGMVerifyAccess: success (dirty)\n"));
3367 else
3368 {
3369 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3370# else
3371 {
3372 GSTPDE PdeSrc;
3373 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3374 PdeSrc.n.u1Present = 1;
3375 PdeSrc.n.u1Write = 1;
3376 PdeSrc.n.u1Accessed = 1;
3377 PdeSrc.n.u1User = 1;
3378
3379# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3380 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3381 if (uErr & X86_TRAP_PF_US)
3382 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3383 else /* supervisor */
3384 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3385
3386 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3387 if (RT_SUCCESS(rc))
3388 {
3389 /* Page was successfully synced */
3390 Log2(("PGMVerifyAccess: success (sync)\n"));
3391 rc = VINF_SUCCESS;
3392 }
3393 else
3394 {
3395 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3396 rc = VINF_EM_RAW_GUEST_TRAP;
3397 }
3398 }
3399# if defined(IN_RC)
3400 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3401 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3402# endif
3403 pgmUnlock(pVM);
3404 return rc;
3405
3406#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3407
3408 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3409 return VERR_INTERNAL_ERROR;
3410#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3411}
3412
3413#undef MY_STAM_COUNTER_INC
3414#define MY_STAM_COUNTER_INC(a) do { } while (0)
3415
3416
3417/**
3418 * Syncs the paging hierarchy starting at CR3.
3419 *
3420 * @returns VBox status code, no specials.
3421 * @param pVCpu The VMCPU handle.
3422 * @param cr0 Guest context CR0 register
3423 * @param cr3 Guest context CR3 register
3424 * @param cr4 Guest context CR4 register
3425 * @param fGlobal Including global page directories or not
3426 */
3427PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3428{
3429 PVM pVM = pVCpu->CTX_SUFF(pVM);
3430
3431 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
3432 fGlobal = true; /* Change this CR3 reload to be a global one. */
3433
3434 LogFlow(("SyncCR3 %d\n", fGlobal));
3435
3436#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3437
3438 pgmLock(pVM);
3439# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3440 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3441 if (pPool->cDirtyPages)
3442 pgmPoolResetDirtyPages(pVM);
3443# endif
3444
3445 /*
3446 * Update page access handlers.
3447 * The virtual are always flushed, while the physical are only on demand.
3448 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3449 * have to look into that later because it will have a bad influence on the performance.
3450 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3451 * bird: Yes, but that won't work for aliases.
3452 */
3453 /** @todo this MUST go away. See #1557. */
3454 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3455 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3456 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3457 pgmUnlock(pVM);
3458#endif /* !NESTED && !EPT */
3459
3460#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3461 /*
3462 * Nested / EPT - almost no work.
3463 */
3464 /** @todo check if this is really necessary; the call does it as well... */
3465 HWACCMFlushTLB(pVCpu);
3466 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3467 return VINF_SUCCESS;
3468
3469#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3470 /*
3471 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3472 * out the shadow parts when the guest modifies its tables.
3473 */
3474 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3475 return VINF_SUCCESS;
3476
3477#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3478
3479# ifndef PGM_WITHOUT_MAPPINGS
3480 /*
3481 * Check for and resolve conflicts with our guest mappings if they
3482 * are enabled and not fixed.
3483 */
3484 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
3485 {
3486 int rc = pgmMapResolveConflicts(pVM);
3487 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3488 if (rc == VINF_PGM_SYNC_CR3)
3489 {
3490 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3491 return VINF_PGM_SYNC_CR3;
3492 }
3493 }
3494# else
3495 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3496# endif
3497 return VINF_SUCCESS;
3498#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3499}
3500
3501
3502
3503
3504#ifdef VBOX_STRICT
3505#ifdef IN_RC
3506# undef AssertMsgFailed
3507# define AssertMsgFailed Log
3508#endif
3509#ifdef IN_RING3
3510# include <VBox/dbgf.h>
3511
3512/**
3513 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3514 *
3515 * @returns VBox status code (VINF_SUCCESS).
3516 * @param cr3 The root of the hierarchy.
3517 * @param crr The cr4, only PAE and PSE is currently used.
3518 * @param fLongMode Set if long mode, false if not long mode.
3519 * @param cMaxDepth Number of levels to dump.
3520 * @param pHlp Pointer to the output functions.
3521 */
3522RT_C_DECLS_BEGIN
3523VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3524RT_C_DECLS_END
3525
3526#endif
3527
3528/**
3529 * Checks that the shadow page table is in sync with the guest one.
3530 *
3531 * @returns The number of errors.
3532 * @param pVM The virtual machine.
3533 * @param pVCpu The VMCPU handle.
3534 * @param cr3 Guest context CR3 register
3535 * @param cr4 Guest context CR4 register
3536 * @param GCPtr Where to start. Defaults to 0.
3537 * @param cb How much to check. Defaults to everything.
3538 */
3539PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3540{
3541#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3542 return 0;
3543#else
3544 unsigned cErrors = 0;
3545 PVM pVM = pVCpu->CTX_SUFF(pVM);
3546 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3547
3548#if PGM_GST_TYPE == PGM_TYPE_PAE
3549 /** @todo currently broken; crashes below somewhere */
3550 AssertFailed();
3551#endif
3552
3553#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3554 || PGM_GST_TYPE == PGM_TYPE_PAE \
3555 || PGM_GST_TYPE == PGM_TYPE_AMD64
3556
3557# if PGM_GST_TYPE == PGM_TYPE_32BIT
3558 bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
3559# else
3560 bool fBigPagesSupported = true;
3561# endif
3562 PPGMCPU pPGM = &pVCpu->pgm.s;
3563 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3564 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3565# ifndef IN_RING0
3566 RTHCPHYS HCPhys; /* general usage. */
3567# endif
3568 int rc;
3569
3570 /*
3571 * Check that the Guest CR3 and all its mappings are correct.
3572 */
3573 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3574 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3575 false);
3576# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3577# if PGM_GST_TYPE == PGM_TYPE_32BIT
3578 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3579# else
3580 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3581# endif
3582 AssertRCReturn(rc, 1);
3583 HCPhys = NIL_RTHCPHYS;
3584 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3585 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3586# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3587 pgmGstGet32bitPDPtr(pPGM);
3588 RTGCPHYS GCPhys;
3589 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3590 AssertRCReturn(rc, 1);
3591 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3592# endif
3593# endif /* !IN_RING0 */
3594
3595 /*
3596 * Get and check the Shadow CR3.
3597 */
3598# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3599 unsigned cPDEs = X86_PG_ENTRIES;
3600 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3601# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3602# if PGM_GST_TYPE == PGM_TYPE_32BIT
3603 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3604# else
3605 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3606# endif
3607 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3608# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3609 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3610 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3611# endif
3612 if (cb != ~(RTGCPTR)0)
3613 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3614
3615/** @todo call the other two PGMAssert*() functions. */
3616
3617# if PGM_GST_TYPE == PGM_TYPE_AMD64
3618 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3619
3620 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3621 {
3622 PPGMPOOLPAGE pShwPdpt = NULL;
3623 PX86PML4E pPml4eSrc;
3624 PX86PML4E pPml4eDst;
3625 RTGCPHYS GCPhysPdptSrc;
3626
3627 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3628 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3629
3630 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3631 if (!pPml4eDst->n.u1Present)
3632 {
3633 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3634 continue;
3635 }
3636
3637 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3638 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3639
3640 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3641 {
3642 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3643 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3644 cErrors++;
3645 continue;
3646 }
3647
3648 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3649 {
3650 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3651 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3652 cErrors++;
3653 continue;
3654 }
3655
3656 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3657 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3658 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3659 {
3660 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3661 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3662 cErrors++;
3663 continue;
3664 }
3665# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3666 {
3667# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3668
3669# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3670 /*
3671 * Check the PDPTEs too.
3672 */
3673 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3674
3675 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3676 {
3677 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3678 PPGMPOOLPAGE pShwPde = NULL;
3679 PX86PDPE pPdpeDst;
3680 RTGCPHYS GCPhysPdeSrc;
3681# if PGM_GST_TYPE == PGM_TYPE_PAE
3682 X86PDPE PdpeSrc;
3683 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3684 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3685# else
3686 PX86PML4E pPml4eSrcIgn;
3687 X86PDPE PdpeSrc;
3688 PX86PDPT pPdptDst;
3689 PX86PDPAE pPDDst;
3690 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3691
3692 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3693 if (rc != VINF_SUCCESS)
3694 {
3695 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3696 GCPtr += 512 * _2M;
3697 continue; /* next PDPTE */
3698 }
3699 Assert(pPDDst);
3700# endif
3701 Assert(iPDSrc == 0);
3702
3703 pPdpeDst = &pPdptDst->a[iPdpt];
3704
3705 if (!pPdpeDst->n.u1Present)
3706 {
3707 GCPtr += 512 * _2M;
3708 continue; /* next PDPTE */
3709 }
3710
3711 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3712 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3713
3714 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3715 {
3716 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3717 GCPtr += 512 * _2M;
3718 cErrors++;
3719 continue;
3720 }
3721
3722 if (GCPhysPdeSrc != pShwPde->GCPhys)
3723 {
3724# if PGM_GST_TYPE == PGM_TYPE_AMD64
3725 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3726# else
3727 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3728# endif
3729 GCPtr += 512 * _2M;
3730 cErrors++;
3731 continue;
3732 }
3733
3734# if PGM_GST_TYPE == PGM_TYPE_AMD64
3735 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3736 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3737 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3738 {
3739 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3740 GCPtr += 512 * _2M;
3741 cErrors++;
3742 continue;
3743 }
3744# endif
3745
3746# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3747 {
3748# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3749# if PGM_GST_TYPE == PGM_TYPE_32BIT
3750 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3751# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3752 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3753# endif
3754# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3755 /*
3756 * Iterate the shadow page directory.
3757 */
3758 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3759 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3760
3761 for (;
3762 iPDDst < cPDEs;
3763 iPDDst++, GCPtr += cIncrement)
3764 {
3765# if PGM_SHW_TYPE == PGM_TYPE_PAE
3766 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3767# else
3768 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3769# endif
3770 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3771 {
3772 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3773 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3774 {
3775 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3776 cErrors++;
3777 continue;
3778 }
3779 }
3780 else if ( (PdeDst.u & X86_PDE_P)
3781 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3782 )
3783 {
3784 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3785 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3786 if (!pPoolPage)
3787 {
3788 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3789 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3790 cErrors++;
3791 continue;
3792 }
3793 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3794
3795 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3796 {
3797 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3798 GCPtr, (uint64_t)PdeDst.u));
3799 cErrors++;
3800 }
3801
3802 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3803 {
3804 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3805 GCPtr, (uint64_t)PdeDst.u));
3806 cErrors++;
3807 }
3808
3809 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3810 if (!PdeSrc.n.u1Present)
3811 {
3812 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3813 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3814 cErrors++;
3815 continue;
3816 }
3817
3818 if ( !PdeSrc.b.u1Size
3819 || !fBigPagesSupported)
3820 {
3821 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3822# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3823 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3824# endif
3825 }
3826 else
3827 {
3828# if PGM_GST_TYPE == PGM_TYPE_32BIT
3829 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3830 {
3831 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3832 GCPtr, (uint64_t)PdeSrc.u));
3833 cErrors++;
3834 continue;
3835 }
3836# endif
3837 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3838# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3839 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3840# endif
3841 }
3842
3843 if ( pPoolPage->enmKind
3844 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3845 {
3846 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3847 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3848 cErrors++;
3849 }
3850
3851 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3852 if (!pPhysPage)
3853 {
3854 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3855 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3856 cErrors++;
3857 continue;
3858 }
3859
3860 if (GCPhysGst != pPoolPage->GCPhys)
3861 {
3862 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3863 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3864 cErrors++;
3865 continue;
3866 }
3867
3868 if ( !PdeSrc.b.u1Size
3869 || !fBigPagesSupported)
3870 {
3871 /*
3872 * Page Table.
3873 */
3874 const GSTPT *pPTSrc;
3875 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3876 if (RT_FAILURE(rc))
3877 {
3878 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3879 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3880 cErrors++;
3881 continue;
3882 }
3883 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3884 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3885 {
3886 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3887 // (This problem will go away when/if we shadow multiple CR3s.)
3888 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3889 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3890 cErrors++;
3891 continue;
3892 }
3893 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3894 {
3895 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3896 GCPtr, (uint64_t)PdeDst.u));
3897 cErrors++;
3898 continue;
3899 }
3900
3901 /* iterate the page table. */
3902# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3903 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3904 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3905# else
3906 const unsigned offPTSrc = 0;
3907# endif
3908 for (unsigned iPT = 0, off = 0;
3909 iPT < RT_ELEMENTS(pPTDst->a);
3910 iPT++, off += PAGE_SIZE)
3911 {
3912 const SHWPTE PteDst = pPTDst->a[iPT];
3913
3914 /* skip not-present entries. */
3915 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3916 continue;
3917 Assert(PteDst.n.u1Present);
3918
3919 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3920 if (!PteSrc.n.u1Present)
3921 {
3922# ifdef IN_RING3
3923 PGMAssertHandlerAndFlagsInSync(pVM);
3924 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3925# endif
3926 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3927 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3928 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3929 cErrors++;
3930 continue;
3931 }
3932
3933 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3934# if 1 /** @todo sync accessed bit properly... */
3935 fIgnoreFlags |= X86_PTE_A;
3936# endif
3937
3938 /* match the physical addresses */
3939 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3940 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3941
3942# ifdef IN_RING3
3943 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3944 if (RT_FAILURE(rc))
3945 {
3946 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3947 {
3948 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3949 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3950 cErrors++;
3951 continue;
3952 }
3953 }
3954 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3955 {
3956 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3957 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3958 cErrors++;
3959 continue;
3960 }
3961# endif
3962
3963 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3964 if (!pPhysPage)
3965 {
3966# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3967 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3968 {
3969 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3970 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3971 cErrors++;
3972 continue;
3973 }
3974# endif
3975 if (PteDst.n.u1Write)
3976 {
3977 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3978 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3979 cErrors++;
3980 }
3981 fIgnoreFlags |= X86_PTE_RW;
3982 }
3983 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
3984 {
3985 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3986 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3987 cErrors++;
3988 continue;
3989 }
3990
3991 /* flags */
3992 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3993 {
3994 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3995 {
3996 if (PteDst.n.u1Write)
3997 {
3998 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3999 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4000 cErrors++;
4001 continue;
4002 }
4003 fIgnoreFlags |= X86_PTE_RW;
4004 }
4005 else
4006 {
4007 if (PteDst.n.u1Present)
4008 {
4009 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4010 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4011 cErrors++;
4012 continue;
4013 }
4014 fIgnoreFlags |= X86_PTE_P;
4015 }
4016 }
4017 else
4018 {
4019 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4020 {
4021 if (PteDst.n.u1Write)
4022 {
4023 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4024 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4025 cErrors++;
4026 continue;
4027 }
4028 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4029 {
4030 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4031 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4032 cErrors++;
4033 continue;
4034 }
4035 if (PteDst.n.u1Dirty)
4036 {
4037 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4038 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4039 cErrors++;
4040 }
4041# if 0 /** @todo sync access bit properly... */
4042 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4043 {
4044 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4045 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4046 cErrors++;
4047 }
4048 fIgnoreFlags |= X86_PTE_RW;
4049# else
4050 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4051# endif
4052 }
4053 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4054 {
4055 /* access bit emulation (not implemented). */
4056 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4057 {
4058 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4059 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4060 cErrors++;
4061 continue;
4062 }
4063 if (!PteDst.n.u1Accessed)
4064 {
4065 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4066 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4067 cErrors++;
4068 }
4069 fIgnoreFlags |= X86_PTE_P;
4070 }
4071# ifdef DEBUG_sandervl
4072 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4073# endif
4074 }
4075
4076 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4077 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4078 )
4079 {
4080 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4081 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4082 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4083 cErrors++;
4084 continue;
4085 }
4086 } /* foreach PTE */
4087 }
4088 else
4089 {
4090 /*
4091 * Big Page.
4092 */
4093 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4094 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4095 {
4096 if (PdeDst.n.u1Write)
4097 {
4098 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4099 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4100 cErrors++;
4101 continue;
4102 }
4103 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4104 {
4105 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4106 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4107 cErrors++;
4108 continue;
4109 }
4110# if 0 /** @todo sync access bit properly... */
4111 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4112 {
4113 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4114 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4115 cErrors++;
4116 }
4117 fIgnoreFlags |= X86_PTE_RW;
4118# else
4119 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4120# endif
4121 }
4122 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4123 {
4124 /* access bit emulation (not implemented). */
4125 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4126 {
4127 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4128 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4129 cErrors++;
4130 continue;
4131 }
4132 if (!PdeDst.n.u1Accessed)
4133 {
4134 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4135 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4136 cErrors++;
4137 }
4138 fIgnoreFlags |= X86_PTE_P;
4139 }
4140
4141 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4142 {
4143 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4144 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4145 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4146 cErrors++;
4147 }
4148
4149 /* iterate the page table. */
4150 for (unsigned iPT = 0, off = 0;
4151 iPT < RT_ELEMENTS(pPTDst->a);
4152 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4153 {
4154 const SHWPTE PteDst = pPTDst->a[iPT];
4155
4156 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4157 {
4158 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4159 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4160 cErrors++;
4161 }
4162
4163 /* skip not-present entries. */
4164 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4165 continue;
4166
4167 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4168
4169 /* match the physical addresses */
4170 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4171
4172# ifdef IN_RING3
4173 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4174 if (RT_FAILURE(rc))
4175 {
4176 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4177 {
4178 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4179 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4180 cErrors++;
4181 }
4182 }
4183 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4184 {
4185 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4186 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4187 cErrors++;
4188 continue;
4189 }
4190# endif
4191 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4192 if (!pPhysPage)
4193 {
4194# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4195 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4196 {
4197 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4198 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4199 cErrors++;
4200 continue;
4201 }
4202# endif
4203 if (PteDst.n.u1Write)
4204 {
4205 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4206 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4207 cErrors++;
4208 }
4209 fIgnoreFlags |= X86_PTE_RW;
4210 }
4211 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4212 {
4213 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4214 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4215 cErrors++;
4216 continue;
4217 }
4218
4219 /* flags */
4220 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4221 {
4222 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4223 {
4224 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4225 {
4226 if (PteDst.n.u1Write)
4227 {
4228 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4229 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4230 cErrors++;
4231 continue;
4232 }
4233 fIgnoreFlags |= X86_PTE_RW;
4234 }
4235 }
4236 else
4237 {
4238 if (PteDst.n.u1Present)
4239 {
4240 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4241 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4242 cErrors++;
4243 continue;
4244 }
4245 fIgnoreFlags |= X86_PTE_P;
4246 }
4247 }
4248
4249 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4250 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4251 )
4252 {
4253 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4254 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4255 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4256 cErrors++;
4257 continue;
4258 }
4259 } /* for each PTE */
4260 }
4261 }
4262 /* not present */
4263
4264 } /* for each PDE */
4265
4266 } /* for each PDPTE */
4267
4268 } /* for each PML4E */
4269
4270# ifdef DEBUG
4271 if (cErrors)
4272 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4273# endif
4274
4275#endif /* GST == 32BIT, PAE or AMD64 */
4276 return cErrors;
4277
4278#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4279}
4280#endif /* VBOX_STRICT */
4281
4282
4283/**
4284 * Sets up the CR3 for shadow paging
4285 *
4286 * @returns Strict VBox status code.
4287 * @retval VINF_SUCCESS.
4288 *
4289 * @param pVCpu The VMCPU handle.
4290 * @param GCPhysCR3 The physical address in the CR3 register.
4291 */
4292PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4293{
4294 PVM pVM = pVCpu->CTX_SUFF(pVM);
4295
4296 /* Update guest paging info. */
4297#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4298 || PGM_GST_TYPE == PGM_TYPE_PAE \
4299 || PGM_GST_TYPE == PGM_TYPE_AMD64
4300
4301 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4302
4303 /*
4304 * Map the page CR3 points at.
4305 */
4306 RTHCPTR HCPtrGuestCR3;
4307 RTHCPHYS HCPhysGuestCR3;
4308 pgmLock(pVM);
4309 PPGMPAGE pPageCR3 = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4310 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4311 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4312 /** @todo this needs some reworking wrt. locking. */
4313# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4314 HCPtrGuestCR3 = NIL_RTHCPTR;
4315 int rc = VINF_SUCCESS;
4316# else
4317 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4318# endif
4319 pgmUnlock(pVM);
4320 if (RT_SUCCESS(rc))
4321 {
4322 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4323 if (RT_SUCCESS(rc))
4324 {
4325# ifdef IN_RC
4326 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4327# endif
4328# if PGM_GST_TYPE == PGM_TYPE_32BIT
4329 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4330# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4331 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4332# endif
4333 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4334
4335# elif PGM_GST_TYPE == PGM_TYPE_PAE
4336 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4337 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4338# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4339 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4340# endif
4341 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4342 Log(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4343
4344 /*
4345 * Map the 4 PDs too.
4346 */
4347 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVCpu->pgm.s);
4348 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4349 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4350 {
4351 if (pGuestPDPT->a[i].n.u1Present)
4352 {
4353 RTHCPTR HCPtr;
4354 RTHCPHYS HCPhys;
4355 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4356 pgmLock(pVM);
4357 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4358 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4359 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4360# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4361 HCPtr = NIL_RTHCPTR;
4362 int rc2 = VINF_SUCCESS;
4363# else
4364 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4365# endif
4366 pgmUnlock(pVM);
4367 if (RT_SUCCESS(rc2))
4368 {
4369 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4370 AssertRCReturn(rc, rc);
4371
4372 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4373# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4374 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4375# endif
4376 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4377 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4378# ifdef IN_RC
4379 PGM_INVL_PG(pVCpu, GCPtr);
4380# endif
4381 continue;
4382 }
4383 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4384 }
4385
4386 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4387# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4388 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4389# endif
4390 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4391 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4392# ifdef IN_RC
4393 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4394# endif
4395 }
4396
4397# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4398 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4399# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4400 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4401# endif
4402# endif
4403 }
4404 else
4405 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4406 }
4407 else
4408 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4409
4410#else /* prot/real stub */
4411 int rc = VINF_SUCCESS;
4412#endif
4413
4414 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4415# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4416 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4417 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4418 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4419 && PGM_GST_TYPE != PGM_TYPE_PROT))
4420
4421 Assert(!HWACCMIsNestedPagingActive(pVM));
4422
4423 /*
4424 * Update the shadow root page as well since that's not fixed.
4425 */
4426 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4427 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4428 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4429 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4430 PPGMPOOLPAGE pNewShwPageCR3;
4431
4432 pgmLock(pVM);
4433
4434# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4435 if (pPool->cDirtyPages)
4436 pgmPoolResetDirtyPages(pVM);
4437# endif
4438
4439 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4440 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4441 AssertFatalRC(rc);
4442 rc = VINF_SUCCESS;
4443
4444# ifdef IN_RC
4445 /*
4446 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4447 * state will be inconsistent! Flush important things now while
4448 * we still can and then make sure there are no ring-3 calls.
4449 */
4450 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4451 VMMRZCallRing3Disable(pVCpu);
4452# endif
4453
4454 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4455 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4456 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4457# ifdef IN_RING0
4458 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4459 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4460# elif defined(IN_RC)
4461 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4462 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4463# else
4464 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4465 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4466# endif
4467
4468# ifndef PGM_WITHOUT_MAPPINGS
4469 /*
4470 * Apply all hypervisor mappings to the new CR3.
4471 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4472 * make sure we check for conflicts in the new CR3 root.
4473 */
4474# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4475 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4476# endif
4477 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4478 AssertRCReturn(rc, rc);
4479# endif
4480
4481 /* Set the current hypervisor CR3. */
4482 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4483 SELMShadowCR3Changed(pVM, pVCpu);
4484
4485# ifdef IN_RC
4486 /* NOTE: The state is consistent again. */
4487 VMMRZCallRing3Enable(pVCpu);
4488# endif
4489
4490 /* Clean up the old CR3 root. */
4491 if ( pOldShwPageCR3
4492 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4493 {
4494 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4495# ifndef PGM_WITHOUT_MAPPINGS
4496 /* Remove the hypervisor mappings from the shadow page table. */
4497 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4498# endif
4499 /* Mark the page as unlocked; allow flushing again. */
4500 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4501
4502 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4503 }
4504 pgmUnlock(pVM);
4505# endif
4506
4507 return rc;
4508}
4509
4510/**
4511 * Unmaps the shadow CR3.
4512 *
4513 * @returns VBox status, no specials.
4514 * @param pVCpu The VMCPU handle.
4515 */
4516PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4517{
4518 LogFlow(("UnmapCR3\n"));
4519
4520 int rc = VINF_SUCCESS;
4521 PVM pVM = pVCpu->CTX_SUFF(pVM);
4522
4523 /*
4524 * Update guest paging info.
4525 */
4526#if PGM_GST_TYPE == PGM_TYPE_32BIT
4527 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4528# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4529 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4530# endif
4531 pVCpu->pgm.s.pGst32BitPdRC = 0;
4532
4533#elif PGM_GST_TYPE == PGM_TYPE_PAE
4534 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4535# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4536 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4537# endif
4538 pVCpu->pgm.s.pGstPaePdptRC = 0;
4539 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4540 {
4541 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4542# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4543 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4544# endif
4545 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4546 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4547 }
4548
4549#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4550 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4551# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4552 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4553# endif
4554
4555#else /* prot/real mode stub */
4556 /* nothing to do */
4557#endif
4558
4559#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4560 /*
4561 * Update shadow paging info.
4562 */
4563# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4564 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4565 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4566
4567# if PGM_GST_TYPE != PGM_TYPE_REAL
4568 Assert(!HWACCMIsNestedPagingActive(pVM));
4569# endif
4570
4571 pgmLock(pVM);
4572
4573# ifndef PGM_WITHOUT_MAPPINGS
4574 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4575 /* Remove the hypervisor mappings from the shadow page table. */
4576 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4577# endif
4578
4579 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4580 {
4581 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4582
4583 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4584
4585# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4586 if (pPool->cDirtyPages)
4587 pgmPoolResetDirtyPages(pVM);
4588# endif
4589
4590 /* Mark the page as unlocked; allow flushing again. */
4591 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4592
4593 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4594 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4595 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4596 pVCpu->pgm.s.pShwPageCR3RC = 0;
4597 pVCpu->pgm.s.iShwUser = 0;
4598 pVCpu->pgm.s.iShwUserTable = 0;
4599 }
4600 pgmUnlock(pVM);
4601# endif
4602#endif /* !IN_RC*/
4603
4604 return rc;
4605}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette