VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 27203

Last change on this file since 27203 was 27203, checked in by vboxsync, 15 years ago

Double derefence removed

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 196.1 KB
Line 
1/* $Id: PGMAllBth.h 27203 2010-03-09 10:48:40Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27RT_C_DECLS_BEGIN
28PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
29PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
34PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
35PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
36PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
37#ifdef VBOX_STRICT
38PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
39#endif
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
42PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
43RT_C_DECLS_END
44
45
46/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
47#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
48# error "Invalid combination; PAE guest implies PAE shadow"
49#endif
50
51#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
52 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
53# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
54#endif
55
56#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
57 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
58# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
59#endif
60
61#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
62 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
63# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
64#endif
65
66
67#ifndef IN_RING3
68/**
69 * #PF Handler for raw-mode guest execution.
70 *
71 * @returns VBox status code (appropriate for trap handling and GC return).
72 *
73 * @param pVCpu VMCPU Handle.
74 * @param uErr The trap error code.
75 * @param pRegFrame Trap register frame.
76 * @param pvFault The fault address.
77 * @param pfLockTaken PGM lock taken here or not (out)
78 */
79PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
80{
81 PVM pVM = pVCpu->CTX_SUFF(pVM);
82
83 *pfLockTaken = false;
84
85# if defined(IN_RC) && defined(VBOX_STRICT)
86 PGMDynCheckLocks(pVM);
87# endif
88
89# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
90 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
91 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
92
93# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
94 /*
95 * Hide the instruction fetch trap indicator for now.
96 */
97 /** @todo NXE will change this and we must fix NXE in the switcher too! */
98 if (uErr & X86_TRAP_PF_ID)
99 {
100 uErr &= ~X86_TRAP_PF_ID;
101 TRPMSetErrorCode(pVCpu, uErr);
102 }
103# endif
104
105 /*
106 * Get PDs.
107 */
108 int rc;
109# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
110# if PGM_GST_TYPE == PGM_TYPE_32BIT
111 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
112 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
113
114# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
115
116# if PGM_GST_TYPE == PGM_TYPE_PAE
117 unsigned iPDSrc = 0; /* initialized to shut up gcc */
118 X86PDPE PdpeSrc;
119 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
120
121# elif PGM_GST_TYPE == PGM_TYPE_AMD64
122 unsigned iPDSrc = 0; /* initialized to shut up gcc */
123 PX86PML4E pPml4eSrc;
124 X86PDPE PdpeSrc;
125 PGSTPD pPDSrc;
126
127 pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
128 Assert(pPml4eSrc);
129# endif
130
131 /* Quick check for a valid guest trap. (PAE & AMD64) */
132 if (!pPDSrc)
133 {
134# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
135 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
136# else
137 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
138# endif
139 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
140 TRPMSetErrorCode(pVCpu, uErr);
141 return VINF_EM_RAW_GUEST_TRAP;
142 }
143# endif
144
145# else /* !PGM_WITH_PAGING */
146 PGSTPD pPDSrc = NULL;
147 const unsigned iPDSrc = 0;
148# endif /* !PGM_WITH_PAGING */
149
150 /* First check for a genuine guest page fault. */
151# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
152 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
153 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDSrc->a[iPDSrc], pvFault);
154 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
155 if (rc == VINF_EM_RAW_GUEST_TRAP)
156 {
157 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
158 = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
159 return rc;
160 }
161# endif /* PGM_WITH_PAGING */
162
163 /* Take the big lock now. */
164 *pfLockTaken = true;
165 pgmLock(pVM);
166
167 /* Fetch the guest PDE */
168# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
169 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
170# else
171 GSTPDE PdeSrc;
172 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
173 PdeSrc.n.u1Present = 1;
174 PdeSrc.n.u1Write = 1;
175 PdeSrc.n.u1Accessed = 1;
176 PdeSrc.n.u1User = 1;
177# endif
178
179# if PGM_SHW_TYPE == PGM_TYPE_32BIT
180 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
181 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
182
183# elif PGM_SHW_TYPE == PGM_TYPE_PAE
184 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
185
186 PX86PDPAE pPDDst;
187# if PGM_GST_TYPE != PGM_TYPE_PAE
188 X86PDPE PdpeSrc;
189
190 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
191 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
192# endif
193 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
194 if (rc != VINF_SUCCESS)
195 {
196 AssertRC(rc);
197 return rc;
198 }
199 Assert(pPDDst);
200
201# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
202 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
203 PX86PDPAE pPDDst;
204# if PGM_GST_TYPE == PGM_TYPE_PROT
205 /* AMD-V nested paging */
206 X86PML4E Pml4eSrc;
207 X86PDPE PdpeSrc;
208 PX86PML4E pPml4eSrc = &Pml4eSrc;
209
210 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
211 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
212 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
213# endif
214
215 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
216 if (rc != VINF_SUCCESS)
217 {
218 AssertRC(rc);
219 return rc;
220 }
221 Assert(pPDDst);
222
223# elif PGM_SHW_TYPE == PGM_TYPE_EPT
224 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
225 PEPTPD pPDDst;
226
227 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
228 if (rc != VINF_SUCCESS)
229 {
230 AssertRC(rc);
231 return rc;
232 }
233 Assert(pPDDst);
234# endif
235
236# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
237 /* Dirty page handling. */
238 if (uErr & X86_TRAP_PF_RW) /* write fault? */
239 {
240 /*
241 * If we successfully correct the write protection fault due to dirty bit
242 * tracking, then return immediately.
243 */
244 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
245 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
246 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
247 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
248 {
249 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
250 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
251 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
252 return VINF_SUCCESS;
253 }
254 }
255
256# if 0 /* rarely useful; leave for debugging. */
257 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
258# endif
259# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
260
261 /*
262 * A common case is the not-present error caused by lazy page table syncing.
263 *
264 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
265 * so we can safely assume that the shadow PT is present when calling SyncPage later.
266 *
267 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
268 * of mapping conflict and defer to SyncCR3 in R3.
269 * (Again, we do NOT support access handlers for non-present guest pages.)
270 *
271 */
272 Assert(PdeSrc.n.u1Present);
273 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
274 && !pPDDst->a[iPDDst].n.u1Present
275 )
276 {
277 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
278 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
279 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
280 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
281 if (RT_SUCCESS(rc))
282 {
283 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
284 return rc;
285 }
286 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
287 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
288 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
289 return VINF_PGM_SYNC_CR3;
290 }
291
292# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
293 /*
294 * Check if this address is within any of our mappings.
295 *
296 * This is *very* fast and it's gonna save us a bit of effort below and prevent
297 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
298 * (BTW, it's impossible to have physical access handlers in a mapping.)
299 */
300 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
301 {
302 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
303 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
304 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
305 {
306 if (pvFault < pMapping->GCPtr)
307 break;
308 if (pvFault - pMapping->GCPtr < pMapping->cb)
309 {
310 /*
311 * The first thing we check is if we've got an undetected conflict.
312 */
313 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
314 {
315 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
316 while (iPT-- > 0)
317 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
318 {
319 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
320 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
321 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
322 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
323 return VINF_PGM_SYNC_CR3;
324 }
325 }
326
327 /*
328 * Check if the fault address is in a virtual page access handler range.
329 */
330 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
331 if ( pCur
332 && pvFault - pCur->Core.Key < pCur->cb
333 && uErr & X86_TRAP_PF_RW)
334 {
335# ifdef IN_RC
336 STAM_PROFILE_START(&pCur->Stat, h);
337 pgmUnlock(pVM);
338 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
339 pgmLock(pVM);
340 STAM_PROFILE_STOP(&pCur->Stat, h);
341# else
342 AssertFailed();
343 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
344# endif
345 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
346 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
347 return rc;
348 }
349
350 /*
351 * Pretend we're not here and let the guest handle the trap.
352 */
353 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
354 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
355 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
356 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
357 return VINF_EM_RAW_GUEST_TRAP;
358 }
359 }
360 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
361 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
362# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
363
364 /*
365 * Check if this fault address is flagged for special treatment,
366 * which means we'll have to figure out the physical address and
367 * check flags associated with it.
368 *
369 * ASSUME that we can limit any special access handling to pages
370 * in page tables which the guest believes to be present.
371 */
372 Assert(PdeSrc.n.u1Present);
373 {
374 RTGCPHYS GCPhys = NIL_RTGCPHYS;
375
376# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
377 if ( PdeSrc.b.u1Size
378# if PGM_GST_TYPE == PGM_TYPE_32BIT
379 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
380# endif
381 )
382 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
383 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
384 else
385 {
386 PGSTPT pPTSrc;
387 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
388 if (RT_SUCCESS(rc))
389 {
390 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
391 if (pPTSrc->a[iPTESrc].n.u1Present)
392 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
393 }
394 }
395# else
396 /* No paging so the fault address is the physical address */
397 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
398# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
399
400 /*
401 * If we have a GC address we'll check if it has any flags set.
402 */
403 if (GCPhys != NIL_RTGCPHYS)
404 {
405 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
406
407 PPGMPAGE pPage;
408 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
409 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
410 {
411 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
412 {
413 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
414 {
415 /*
416 * Physical page access handler.
417 */
418 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
419 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
420 if (pCur)
421 {
422# ifdef PGM_SYNC_N_PAGES
423 /*
424 * If the region is write protected and we got a page not present fault, then sync
425 * the pages. If the fault was caused by a read, then restart the instruction.
426 * In case of write access continue to the GC write handler.
427 *
428 * ASSUMES that there is only one handler per page or that they have similar write properties.
429 */
430 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
431 && !(uErr & X86_TRAP_PF_P))
432 {
433 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
434 if ( RT_FAILURE(rc)
435 || !(uErr & X86_TRAP_PF_RW)
436 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
437 {
438 AssertRC(rc);
439 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
440 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
441 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
442 return rc;
443 }
444 }
445# endif
446
447 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
448 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
449 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
450
451# if defined(IN_RC) || defined(IN_RING0)
452 if (pCur->CTX_SUFF(pfnHandler))
453 {
454 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
455# ifdef IN_RING0
456 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
457# else
458 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
459# endif
460 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
461 void *pvUser = pCur->CTX_SUFF(pvUser);
462
463 STAM_PROFILE_START(&pCur->Stat, h);
464 if (fLeaveLock)
465 pgmUnlock(pVM); /* @todo: Not entirely safe. */
466
467 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
468 if (fLeaveLock)
469 pgmLock(pVM);
470# ifdef VBOX_WITH_STATISTICS
471 pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
472 if (pCur)
473 STAM_PROFILE_STOP(&pCur->Stat, h);
474# else
475 pCur = NULL; /* might be invalid by now. */
476# endif
477
478 }
479 else
480# endif
481 rc = VINF_EM_RAW_EMULATE_INSTR;
482
483 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
484 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
485 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
486 return rc;
487 }
488 }
489# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
490 else
491 {
492# ifdef PGM_SYNC_N_PAGES
493 /*
494 * If the region is write protected and we got a page not present fault, then sync
495 * the pages. If the fault was caused by a read, then restart the instruction.
496 * In case of write access continue to the GC write handler.
497 */
498 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
499 && !(uErr & X86_TRAP_PF_P))
500 {
501 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
502 if ( RT_FAILURE(rc)
503 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
504 || !(uErr & X86_TRAP_PF_RW))
505 {
506 AssertRC(rc);
507 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
508 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
509 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
510 return rc;
511 }
512 }
513# endif
514 /*
515 * Ok, it's an virtual page access handler.
516 *
517 * Since it's faster to search by address, we'll do that first
518 * and then retry by GCPhys if that fails.
519 */
520 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
521 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
522 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
523 */
524 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
525 if (pCur)
526 {
527 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
528 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
529 || !(uErr & X86_TRAP_PF_P)
530 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
531 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
532
533 if ( pvFault - pCur->Core.Key < pCur->cb
534 && ( uErr & X86_TRAP_PF_RW
535 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
536 {
537# ifdef IN_RC
538 STAM_PROFILE_START(&pCur->Stat, h);
539 pgmUnlock(pVM);
540 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
541 pgmLock(pVM);
542 STAM_PROFILE_STOP(&pCur->Stat, h);
543# else
544 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
545# endif
546 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
547 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
548 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
549 return rc;
550 }
551 /* Unhandled part of a monitored page */
552 }
553 else
554 {
555 /* Check by physical address. */
556 unsigned iPage;
557 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
558 &pCur, &iPage);
559 Assert(RT_SUCCESS(rc) || !pCur);
560 if ( pCur
561 && ( uErr & X86_TRAP_PF_RW
562 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
563 {
564 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
565# ifdef IN_RC
566 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
567 Assert(off < pCur->cb);
568 STAM_PROFILE_START(&pCur->Stat, h);
569 pgmUnlock(pVM);
570 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
571 pgmLock(pVM);
572 STAM_PROFILE_STOP(&pCur->Stat, h);
573# else
574 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
575# endif
576 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
577 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
578 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
579 return rc;
580 }
581 }
582 }
583# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
584
585 /*
586 * There is a handled area of the page, but this fault doesn't belong to it.
587 * We must emulate the instruction.
588 *
589 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
590 * we first check if this was a page-not-present fault for a page with only
591 * write access handlers. Restart the instruction if it wasn't a write access.
592 */
593 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
594
595 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
596 && !(uErr & X86_TRAP_PF_P))
597 {
598 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
599 if ( RT_FAILURE(rc)
600 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
601 || !(uErr & X86_TRAP_PF_RW))
602 {
603 AssertRC(rc);
604 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
605 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
606 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
607 return rc;
608 }
609 }
610
611 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
612 * It's writing to an unhandled part of the LDT page several million times.
613 */
614 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
615 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
616 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
617 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
618 return rc;
619 } /* if any kind of handler */
620
621# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
622 if (uErr & X86_TRAP_PF_P)
623 {
624 /*
625 * The page isn't marked, but it might still be monitored by a virtual page access handler.
626 * (ASSUMES no temporary disabling of virtual handlers.)
627 */
628 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
629 * we should correct both the shadow page table and physical memory flags, and not only check for
630 * accesses within the handler region but for access to pages with virtual handlers. */
631 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
632 if (pCur)
633 {
634 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
635 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
636 || !(uErr & X86_TRAP_PF_P)
637 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
638 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
639
640 if ( pvFault - pCur->Core.Key < pCur->cb
641 && ( uErr & X86_TRAP_PF_RW
642 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
643 {
644# ifdef IN_RC
645 STAM_PROFILE_START(&pCur->Stat, h);
646 pgmUnlock(pVM);
647 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
648 pgmLock(pVM);
649 STAM_PROFILE_STOP(&pCur->Stat, h);
650# else
651 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
652# endif
653 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
654 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
655 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
656 return rc;
657 }
658 }
659 }
660# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
661 }
662 else
663 {
664 /*
665 * When the guest accesses invalid physical memory (e.g. probing
666 * of RAM or accessing a remapped MMIO range), then we'll fall
667 * back to the recompiler to emulate the instruction.
668 */
669 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
670 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
671 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
672 return VINF_EM_RAW_EMULATE_INSTR;
673 }
674
675 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
676
677# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
678 /*
679 * We are here only if page is present in Guest page tables and
680 * trap is not handled by our handlers.
681 *
682 * Check it for page out-of-sync situation.
683 */
684 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
685
686 if (!(uErr & X86_TRAP_PF_P))
687 {
688 /*
689 * Page is not present in our page tables.
690 * Try to sync it!
691 * BTW, fPageShw is invalid in this branch!
692 */
693 if (uErr & X86_TRAP_PF_US)
694 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
695 else /* supervisor */
696 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
697
698# if defined(LOG_ENABLED) && !defined(IN_RING0)
699 RTGCPHYS GCPhys2;
700 uint64_t fPageGst2;
701 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
702 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
703 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
704# endif /* LOG_ENABLED */
705
706# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
707 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
708 {
709 uint64_t fPageGst;
710 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
711 if ( RT_SUCCESS(rc)
712 && !(fPageGst & X86_PTE_US))
713 {
714 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
715 if ( pvFault == (RTGCPTR)pRegFrame->eip
716 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
717# ifdef CSAM_DETECT_NEW_CODE_PAGES
718 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
719 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
720# endif /* CSAM_DETECT_NEW_CODE_PAGES */
721 )
722 {
723 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
724 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
725 if (rc != VINF_SUCCESS)
726 {
727 /*
728 * CSAM needs to perform a job in ring 3.
729 *
730 * Sync the page before going to the host context; otherwise we'll end up in a loop if
731 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
732 */
733 LogFlow(("CSAM ring 3 job\n"));
734 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
735 AssertRC(rc2);
736
737 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
738 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
739 return rc;
740 }
741 }
742# ifdef CSAM_DETECT_NEW_CODE_PAGES
743 else if ( uErr == X86_TRAP_PF_RW
744 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
745 && pRegFrame->ecx < 0x10000)
746 {
747 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
748 * to detect loading of new code pages.
749 */
750
751 /*
752 * Decode the instruction.
753 */
754 RTGCPTR PC;
755 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
756 if (rc == VINF_SUCCESS)
757 {
758 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
759 uint32_t cbOp;
760 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
761
762 /* For now we'll restrict this to rep movsw/d instructions */
763 if ( rc == VINF_SUCCESS
764 && pDis->pCurInstr->opcode == OP_MOVSWD
765 && (pDis->prefix & PREFIX_REP))
766 {
767 CSAMMarkPossibleCodePage(pVM, pvFault);
768 }
769 }
770 }
771# endif /* CSAM_DETECT_NEW_CODE_PAGES */
772
773 /*
774 * Mark this page as safe.
775 */
776 /** @todo not correct for pages that contain both code and data!! */
777 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
778 CSAMMarkPage(pVM, pvFault, true);
779 }
780 }
781# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
782 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
783 if (RT_SUCCESS(rc))
784 {
785 /* The page was successfully synced, return to the guest. */
786 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
787 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
788 return VINF_SUCCESS;
789 }
790 }
791 else /* uErr & X86_TRAP_PF_P: */
792 {
793 /*
794 * Write protected pages are make writable when the guest makes the first
795 * write to it. This happens for pages that are shared, write monitored
796 * and not yet allocated.
797 *
798 * Also, a side effect of not flushing global PDEs are out of sync pages due
799 * to physical monitored regions, that are no longer valid.
800 * Assume for now it only applies to the read/write flag.
801 */
802 if (RT_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
803 {
804 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
805 {
806 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n",
807 GCPhys, pPage, pvFault, uErr));
808 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
809 if (rc != VINF_SUCCESS)
810 {
811 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
812 return rc;
813 }
814 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
815 return VINF_EM_NO_MEMORY;
816 }
817
818# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
819 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
820 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
821 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG))
822 {
823 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
824 uint64_t fPageGst;
825 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
826 if ( RT_SUCCESS(rc)
827 && !(fPageGst & X86_PTE_RW))
828 {
829 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
830 if (RT_SUCCESS(rc))
831 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
832 else
833 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
834 return rc;
835 }
836 AssertMsg(RT_SUCCESS(rc), ("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
837 }
838# endif
839 /// @todo count the above case; else
840 if (uErr & X86_TRAP_PF_US)
841 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
842 else /* supervisor */
843 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
844
845 /*
846 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
847 * page is not present, which is not true in this case.
848 */
849 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
850 if (RT_SUCCESS(rc))
851 {
852 /*
853 * Page was successfully synced, return to guest.
854 * First invalidate the page as it might be in the TLB.
855 */
856# if PGM_SHW_TYPE == PGM_TYPE_EPT
857 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
858# else
859 PGM_INVL_PG(pVCpu, pvFault);
860# endif
861# ifdef VBOX_STRICT
862 RTGCPHYS GCPhys2;
863 uint64_t fPageGst;
864 if (!HWACCMIsNestedPagingActive(pVM))
865 {
866 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
867 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%d fPageGst=%RX64\n"));
868 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
869 }
870 uint64_t fPageShw;
871 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
872 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
873# endif /* VBOX_STRICT */
874 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
875 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
876 return VINF_SUCCESS;
877 }
878 }
879
880# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
881# ifdef VBOX_STRICT
882 /*
883 * Check for VMM page flags vs. Guest page flags consistency.
884 * Currently only for debug purposes.
885 */
886 if (RT_SUCCESS(rc))
887 {
888 /* Get guest page flags. */
889 uint64_t fPageGst;
890 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
891 if (RT_SUCCESS(rc))
892 {
893 uint64_t fPageShw;
894 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
895
896 /*
897 * Compare page flags.
898 * Note: we have AVL, A, D bits desynched.
899 */
900 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
901 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
902 }
903 else
904 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
905 }
906 else
907 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
908# endif /* VBOX_STRICT */
909# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
910 }
911 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
912# endif /* PGM_OUT_OF_SYNC_IN_GC */
913 }
914 else /* GCPhys == NIL_RTGCPHYS */
915 {
916 /*
917 * Page not present in Guest OS or invalid page table address.
918 * This is potential virtual page access handler food.
919 *
920 * For the present we'll say that our access handlers don't
921 * work for this case - we've already discarded the page table
922 * not present case which is identical to this.
923 *
924 * When we perchance find we need this, we will probably have AVL
925 * trees (offset based) to operate on and we can measure their speed
926 * agains mapping a page table and probably rearrange this handling
927 * a bit. (Like, searching virtual ranges before checking the
928 * physical address.)
929 */
930 }
931 }
932
933# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
934 /*
935 * Conclusion, this is a guest trap.
936 */
937 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
938 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
939 return VINF_EM_RAW_GUEST_TRAP;
940# else
941 /* present, but not a monitored page; perhaps the guest is probing physical memory */
942 return VINF_EM_RAW_EMULATE_INSTR;
943# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
944
945
946# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
947
948 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
949 return VERR_INTERNAL_ERROR;
950# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
951}
952#endif /* !IN_RING3 */
953
954
955/**
956 * Emulation of the invlpg instruction.
957 *
958 *
959 * @returns VBox status code.
960 *
961 * @param pVCpu The VMCPU handle.
962 * @param GCPtrPage Page to invalidate.
963 *
964 * @remark ASSUMES that the guest is updating before invalidating. This order
965 * isn't required by the CPU, so this is speculative and could cause
966 * trouble.
967 * @remark No TLB shootdown is done on any other VCPU as we assume that
968 * invlpg emulation is the *only* reason for calling this function.
969 * (The guest has to shoot down TLB entries on other CPUs itself)
970 * Currently true, but keep in mind!
971 *
972 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
973 */
974PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
975{
976#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
977 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
978 && PGM_SHW_TYPE != PGM_TYPE_EPT
979 int rc;
980 PVM pVM = pVCpu->CTX_SUFF(pVM);
981 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
982
983 Assert(PGMIsLockOwner(pVM));
984
985 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
986
987# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
988 if (pPool->cDirtyPages)
989 pgmPoolResetDirtyPages(pVM);
990# endif
991
992 /*
993 * Get the shadow PD entry and skip out if this PD isn't present.
994 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
995 */
996# if PGM_SHW_TYPE == PGM_TYPE_32BIT
997 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
998 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
999
1000 /* Fetch the pgm pool shadow descriptor. */
1001 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1002 Assert(pShwPde);
1003
1004# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1005 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1006 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
1007
1008 /* If the shadow PDPE isn't present, then skip the invalidate. */
1009 if (!pPdptDst->a[iPdpt].n.u1Present)
1010 {
1011 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1012 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1013 return VINF_SUCCESS;
1014 }
1015
1016 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1017 PPGMPOOLPAGE pShwPde = NULL;
1018 PX86PDPAE pPDDst;
1019
1020 /* Fetch the pgm pool shadow descriptor. */
1021 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1022 AssertRCSuccessReturn(rc, rc);
1023 Assert(pShwPde);
1024
1025 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1026 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1027
1028# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1029 /* PML4 */
1030 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1031 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1032 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1033 PX86PDPAE pPDDst;
1034 PX86PDPT pPdptDst;
1035 PX86PML4E pPml4eDst;
1036 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1037 if (rc != VINF_SUCCESS)
1038 {
1039 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1040 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1041 return VINF_SUCCESS;
1042 }
1043 Assert(pPDDst);
1044
1045 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1046 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1047
1048 if (!pPdpeDst->n.u1Present)
1049 {
1050 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1051 return VINF_SUCCESS;
1052 }
1053
1054 /* Fetch the pgm pool shadow descriptor. */
1055 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1056 Assert(pShwPde);
1057
1058# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1059
1060 const SHWPDE PdeDst = *pPdeDst;
1061 if (!PdeDst.n.u1Present)
1062 {
1063 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1064 return VINF_SUCCESS;
1065 }
1066
1067# if defined(IN_RC)
1068 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1069 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1070# endif
1071
1072 /*
1073 * Get the guest PD entry and calc big page.
1074 */
1075# if PGM_GST_TYPE == PGM_TYPE_32BIT
1076 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
1077 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1078 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1079# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1080 unsigned iPDSrc = 0;
1081# if PGM_GST_TYPE == PGM_TYPE_PAE
1082 X86PDPE PdpeSrc;
1083 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1084# else /* AMD64 */
1085 PX86PML4E pPml4eSrc;
1086 X86PDPE PdpeSrc;
1087 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1088# endif
1089 GSTPDE PdeSrc;
1090
1091 if (pPDSrc)
1092 PdeSrc = pPDSrc->a[iPDSrc];
1093 else
1094 PdeSrc.u = 0;
1095# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1096
1097# if PGM_GST_TYPE == PGM_TYPE_32BIT
1098 const bool fIsBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1099# else
1100 const bool fIsBigPage = PdeSrc.b.u1Size;
1101# endif
1102
1103# ifdef IN_RING3
1104 /*
1105 * If a CR3 Sync is pending we may ignore the invalidate page operation
1106 * depending on the kind of sync and if it's a global page or not.
1107 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1108 */
1109# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1110 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1111 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1112 && fIsBigPage
1113 && PdeSrc.b.u1Global
1114 )
1115 )
1116# else
1117 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1118# endif
1119 {
1120 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1121 return VINF_SUCCESS;
1122 }
1123# endif /* IN_RING3 */
1124
1125 /*
1126 * Deal with the Guest PDE.
1127 */
1128 rc = VINF_SUCCESS;
1129 if (PdeSrc.n.u1Present)
1130 {
1131 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1132 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1133# ifndef PGM_WITHOUT_MAPPING
1134 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1135 {
1136 /*
1137 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1138 */
1139 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1140 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1141 pgmLock(pVM);
1142 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1143 pgmUnlock(pVM);
1144 }
1145 else
1146# endif /* !PGM_WITHOUT_MAPPING */
1147 if (!fIsBigPage)
1148 {
1149 /*
1150 * 4KB - page.
1151 */
1152 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1153 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1154
1155# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1156 /* Reset the modification counter (OpenSolaris trashes tlb entries very often) */
1157 if (pShwPage->cModifications)
1158 pShwPage->cModifications = 1;
1159# endif
1160
1161# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1162 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1163 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1164# endif
1165 if (pShwPage->GCPhys == GCPhys)
1166 {
1167# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1168 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1169 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1170 if (pPT->a[iPTEDst].n.u1Present)
1171 {
1172 /* This is very unlikely with caching/monitoring enabled. */
1173 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1174 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1175 }
1176# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1177 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1178 if (RT_SUCCESS(rc))
1179 rc = VINF_SUCCESS;
1180# endif
1181 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1182 PGM_INVL_PG(pVCpu, GCPtrPage);
1183 }
1184 else
1185 {
1186 /*
1187 * The page table address changed.
1188 */
1189 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1190 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1191 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1192 ASMAtomicWriteSize(pPdeDst, 0);
1193 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1194 PGM_INVL_VCPU_TLBS(pVCpu);
1195 }
1196 }
1197 else
1198 {
1199 /*
1200 * 2/4MB - page.
1201 */
1202 /* Before freeing the page, check if anything really changed. */
1203 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1204 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1205# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1206 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1207 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1208# endif
1209 if ( pShwPage->GCPhys == GCPhys
1210 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1211 {
1212 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1213 /** @todo PAT */
1214 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1215 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1216 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1217 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1218 {
1219 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1220 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1221# if defined(IN_RC)
1222 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1223 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1224# endif
1225 return VINF_SUCCESS;
1226 }
1227 }
1228
1229 /*
1230 * Ok, the page table is present and it's been changed in the guest.
1231 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1232 * We could do this for some flushes in GC too, but we need an algorithm for
1233 * deciding which 4MB pages containing code likely to be executed very soon.
1234 */
1235 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1236 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1237 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1238 ASMAtomicWriteSize(pPdeDst, 0);
1239 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1240 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1241 }
1242 }
1243 else
1244 {
1245 /*
1246 * Page directory is not present, mark shadow PDE not present.
1247 */
1248 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1249 {
1250 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1251 ASMAtomicWriteSize(pPdeDst, 0);
1252 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1253 PGM_INVL_PG(pVCpu, GCPtrPage);
1254 }
1255 else
1256 {
1257 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1258 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1259 }
1260 }
1261# if defined(IN_RC)
1262 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1263 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1264# endif
1265 return rc;
1266
1267#else /* guest real and protected mode */
1268 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1269 return VINF_SUCCESS;
1270#endif
1271}
1272
1273
1274/**
1275 * Update the tracking of shadowed pages.
1276 *
1277 * @param pVCpu The VMCPU handle.
1278 * @param pShwPage The shadow page.
1279 * @param HCPhys The physical page we is being dereferenced.
1280 */
1281DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1282{
1283 PVM pVM = pVCpu->CTX_SUFF(pVM);
1284
1285 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1286 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1287
1288 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1289 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1290 * 2. write protect all shadowed pages. I.e. implement caching.
1291 */
1292 /*
1293 * Find the guest address.
1294 */
1295 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1296 pRam;
1297 pRam = pRam->CTX_SUFF(pNext))
1298 {
1299 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1300 while (iPage-- > 0)
1301 {
1302 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1303 {
1304 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1305 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1306 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1307 return;
1308 }
1309 }
1310 }
1311
1312 for (;;)
1313 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1314}
1315
1316
1317/**
1318 * Update the tracking of shadowed pages.
1319 *
1320 * @param pVCpu The VMCPU handle.
1321 * @param pShwPage The shadow page.
1322 * @param u16 The top 16-bit of the pPage->HCPhys.
1323 * @param pPage Pointer to the guest page. this will be modified.
1324 * @param iPTDst The index into the shadow table.
1325 */
1326DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1327{
1328 PVM pVM = pVCpu->CTX_SUFF(pVM);
1329 /*
1330 * Just deal with the simple first time here.
1331 */
1332 if (!u16)
1333 {
1334 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1335 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1336 }
1337 else
1338 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1339
1340 /* write back */
1341 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1342 PGM_PAGE_SET_TRACKING(pPage, u16);
1343
1344 /* update statistics. */
1345 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1346 pShwPage->cPresent++;
1347 if (pShwPage->iFirstPresent > iPTDst)
1348 pShwPage->iFirstPresent = iPTDst;
1349}
1350
1351
1352/**
1353 * Creates a 4K shadow page for a guest page.
1354 *
1355 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1356 * physical address. The PdeSrc argument only the flags are used. No page structured
1357 * will be mapped in this function.
1358 *
1359 * @param pVCpu The VMCPU handle.
1360 * @param pPteDst Destination page table entry.
1361 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1362 * Can safely assume that only the flags are being used.
1363 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1364 * @param pShwPage Pointer to the shadow page.
1365 * @param iPTDst The index into the shadow table.
1366 *
1367 * @remark Not used for 2/4MB pages!
1368 */
1369DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1370{
1371 if (PteSrc.n.u1Present)
1372 {
1373 PVM pVM = pVCpu->CTX_SUFF(pVM);
1374
1375# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1376 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1377 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64)
1378 if (pShwPage->fDirty)
1379 {
1380 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1381 PX86PTPAE pGstPT;
1382
1383 pGstPT = (PX86PTPAE)&pPool->aDirtyPages[pShwPage->idxDirty][0];
1384 pGstPT->a[iPTDst].u = PteSrc.u;
1385 }
1386# endif
1387 /*
1388 * Find the ram range.
1389 */
1390 PPGMPAGE pPage;
1391 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1392 if (RT_SUCCESS(rc))
1393 {
1394#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1395 /* Try make the page writable if necessary. */
1396 if ( PteSrc.n.u1Write
1397 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1398# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1399 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1400# endif
1401 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1402 {
1403 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1404 AssertRC(rc);
1405 }
1406#endif
1407
1408 /** @todo investiage PWT, PCD and PAT. */
1409 /*
1410 * Make page table entry.
1411 */
1412 SHWPTE PteDst;
1413 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1414 {
1415 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1416 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1417 {
1418#if PGM_SHW_TYPE == PGM_TYPE_EPT
1419 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1420 PteDst.n.u1Present = 1;
1421 PteDst.n.u1Execute = 1;
1422 PteDst.n.u1IgnorePAT = 1;
1423 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1424 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1425#else
1426 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1427 | PGM_PAGE_GET_HCPHYS(pPage);
1428#endif
1429 }
1430 else
1431 {
1432 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1433 PteDst.u = 0;
1434 }
1435 /** @todo count these two kinds. */
1436 }
1437 else
1438 {
1439#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1440 /*
1441 * If the page or page directory entry is not marked accessed,
1442 * we mark the page not present.
1443 */
1444 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1445 {
1446 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1447 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1448 PteDst.u = 0;
1449 }
1450 else
1451 /*
1452 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1453 * when the page is modified.
1454 */
1455 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1456 {
1457 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1458 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1459 | PGM_PAGE_GET_HCPHYS(pPage)
1460 | PGM_PTFLAGS_TRACK_DIRTY;
1461 }
1462 else
1463#endif
1464 {
1465 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1466#if PGM_SHW_TYPE == PGM_TYPE_EPT
1467 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1468 PteDst.n.u1Present = 1;
1469 PteDst.n.u1Write = 1;
1470 PteDst.n.u1Execute = 1;
1471 PteDst.n.u1IgnorePAT = 1;
1472 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1473 /* PteDst.n.u1Size = 0 */
1474#else
1475 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1476 | PGM_PAGE_GET_HCPHYS(pPage);
1477#endif
1478 }
1479 }
1480
1481 /*
1482 * Make sure only allocated pages are mapped writable.
1483 */
1484 if ( PteDst.n.u1Write
1485 && PteDst.n.u1Present
1486 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1487 {
1488 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1489 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1490 }
1491
1492 /*
1493 * Keep user track up to date.
1494 */
1495 if (PteDst.n.u1Present)
1496 {
1497 if (!pPteDst->n.u1Present)
1498 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1499 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1500 {
1501 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1502 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1503 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1504 }
1505 }
1506 else if (pPteDst->n.u1Present)
1507 {
1508 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1509 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1510 }
1511
1512 /*
1513 * Update statistics and commit the entry.
1514 */
1515#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1516 if (!PteSrc.n.u1Global)
1517 pShwPage->fSeenNonGlobal = true;
1518#endif
1519 ASMAtomicWriteSize(pPteDst, PteDst.u);
1520 }
1521 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1522 /** @todo count these. */
1523 }
1524 else
1525 {
1526 /*
1527 * Page not-present.
1528 */
1529 Log2(("SyncPageWorker: page not present in Pte\n"));
1530 /* Keep user track up to date. */
1531 if (pPteDst->n.u1Present)
1532 {
1533 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1534 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1535 }
1536 ASMAtomicWriteSize(pPteDst, 0);
1537 /** @todo count these. */
1538 }
1539}
1540
1541
1542/**
1543 * Syncs a guest OS page.
1544 *
1545 * There are no conflicts at this point, neither is there any need for
1546 * page table allocations.
1547 *
1548 * @returns VBox status code.
1549 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1550 * @param pVCpu The VMCPU handle.
1551 * @param PdeSrc Page directory entry of the guest.
1552 * @param GCPtrPage Guest context page address.
1553 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1554 * @param uErr Fault error (X86_TRAP_PF_*).
1555 */
1556PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1557{
1558 PVM pVM = pVCpu->CTX_SUFF(pVM);
1559 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1560 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1561
1562 Assert(PGMIsLockOwner(pVM));
1563
1564#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1565 || PGM_GST_TYPE == PGM_TYPE_PAE \
1566 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1567 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1568 && PGM_SHW_TYPE != PGM_TYPE_EPT
1569
1570 /*
1571 * Assert preconditions.
1572 */
1573 Assert(PdeSrc.n.u1Present);
1574 Assert(cPages);
1575# if 0 /* rarely useful; leave for debugging. */
1576 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1577# endif
1578
1579 /*
1580 * Get the shadow PDE, find the shadow page table in the pool.
1581 */
1582# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1583 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1584 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1585
1586 /* Fetch the pgm pool shadow descriptor. */
1587 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1588 Assert(pShwPde);
1589
1590# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1591 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1592 PPGMPOOLPAGE pShwPde = NULL;
1593 PX86PDPAE pPDDst;
1594
1595 /* Fetch the pgm pool shadow descriptor. */
1596 int rc2 = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1597 AssertRCSuccessReturn(rc2, rc2);
1598 Assert(pShwPde);
1599
1600 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1601 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1602
1603# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1604 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1605 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1606 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1607 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1608
1609 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1610 AssertRCSuccessReturn(rc2, rc2);
1611 Assert(pPDDst && pPdptDst);
1612 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1613# endif
1614 SHWPDE PdeDst = *pPdeDst;
1615
1616 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1617 if (!PdeDst.n.u1Present)
1618 {
1619 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE p=%p/%RX64\n", pPdeDst, (uint64_t)PdeDst.u));
1620 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1621 return VINF_SUCCESS; /* force the instruction to be executed again. */
1622 }
1623
1624 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1625 Assert(pShwPage);
1626
1627# if PGM_GST_TYPE == PGM_TYPE_AMD64
1628 /* Fetch the pgm pool shadow descriptor. */
1629 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1630 Assert(pShwPde);
1631# endif
1632
1633# if defined(IN_RC)
1634 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1635 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1636# endif
1637
1638 /*
1639 * Check that the page is present and that the shadow PDE isn't out of sync.
1640 */
1641# if PGM_GST_TYPE == PGM_TYPE_32BIT
1642 const bool fBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1643# else
1644 const bool fBigPage = PdeSrc.b.u1Size;
1645# endif
1646 RTGCPHYS GCPhys;
1647 if (!fBigPage)
1648 {
1649 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1650# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1651 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1652 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1653# endif
1654 }
1655 else
1656 {
1657 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1658# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1659 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1660 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1661# endif
1662 }
1663 if ( pShwPage->GCPhys == GCPhys
1664 && PdeSrc.n.u1Present
1665 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1666 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1667# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1668 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !CPUMIsGuestNXEnabled(pVCpu))
1669# endif
1670 )
1671 {
1672 /*
1673 * Check that the PDE is marked accessed already.
1674 * Since we set the accessed bit *before* getting here on a #PF, this
1675 * check is only meant for dealing with non-#PF'ing paths.
1676 */
1677 if (PdeSrc.n.u1Accessed)
1678 {
1679 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1680 if (!fBigPage)
1681 {
1682 /*
1683 * 4KB Page - Map the guest page table.
1684 */
1685 PGSTPT pPTSrc;
1686 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1687 if (RT_SUCCESS(rc))
1688 {
1689# ifdef PGM_SYNC_N_PAGES
1690 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1691 if ( cPages > 1
1692 && !(uErr & X86_TRAP_PF_P)
1693 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1694 {
1695 /*
1696 * This code path is currently only taken when the caller is PGMTrap0eHandler
1697 * for non-present pages!
1698 *
1699 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1700 * deal with locality.
1701 */
1702 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1703# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1704 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1705 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1706# else
1707 const unsigned offPTSrc = 0;
1708# endif
1709 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1710 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1711 iPTDst = 0;
1712 else
1713 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1714 for (; iPTDst < iPTDstEnd; iPTDst++)
1715 {
1716 if (!pPTDst->a[iPTDst].n.u1Present)
1717 {
1718 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1719 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1720 NOREF(GCPtrCurPage);
1721#ifndef IN_RING0
1722 /*
1723 * Assuming kernel code will be marked as supervisor - and not as user level
1724 * and executed using a conforming code selector - And marked as readonly.
1725 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1726 */
1727 PPGMPAGE pPage;
1728 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1729 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1730 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
1731 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1732 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1733 )
1734#endif /* else: CSAM not active */
1735 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1736 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1737 GCPtrCurPage, PteSrc.n.u1Present,
1738 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1739 PteSrc.n.u1User & PdeSrc.n.u1User,
1740 (uint64_t)PteSrc.u,
1741 (uint64_t)pPTDst->a[iPTDst].u,
1742 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1743 }
1744 }
1745 }
1746 else
1747# endif /* PGM_SYNC_N_PAGES */
1748 {
1749 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1750 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1751 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1752 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1753 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1754 GCPtrPage, PteSrc.n.u1Present,
1755 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1756 PteSrc.n.u1User & PdeSrc.n.u1User,
1757 (uint64_t)PteSrc.u,
1758 (uint64_t)pPTDst->a[iPTDst].u,
1759 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1760 }
1761 }
1762 else /* MMIO or invalid page: emulated in #PF handler. */
1763 {
1764 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1765 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1766 }
1767 }
1768 else
1769 {
1770 /*
1771 * 4/2MB page - lazy syncing shadow 4K pages.
1772 * (There are many causes of getting here, it's no longer only CSAM.)
1773 */
1774 /* Calculate the GC physical address of this 4KB shadow page. */
1775 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1776 /* Find ram range. */
1777 PPGMPAGE pPage;
1778 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1779 if (RT_SUCCESS(rc))
1780 {
1781# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1782 /* Try make the page writable if necessary. */
1783 if ( PdeSrc.n.u1Write
1784 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1785# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1786 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1787# endif
1788 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1789 {
1790 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
1791 AssertRC(rc);
1792 }
1793# endif
1794
1795 /*
1796 * Make shadow PTE entry.
1797 */
1798 SHWPTE PteDst;
1799 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1800 | PGM_PAGE_GET_HCPHYS(pPage);
1801 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1802 {
1803 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1804 PteDst.n.u1Write = 0;
1805 else
1806 PteDst.u = 0;
1807 }
1808 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1809 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1810 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1811
1812 /* Make sure only allocated pages are mapped writable. */
1813 if ( PteDst.n.u1Write
1814 && PteDst.n.u1Present
1815 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1816 {
1817 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1818 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1819 }
1820
1821 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1822
1823 /*
1824 * If the page is not flagged as dirty and is writable, then make it read-only
1825 * at PD level, so we can set the dirty bit when the page is modified.
1826 *
1827 * ASSUMES that page access handlers are implemented on page table entry level.
1828 * Thus we will first catch the dirty access and set PDE.D and restart. If
1829 * there is an access handler, we'll trap again and let it work on the problem.
1830 */
1831 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1832 * As for invlpg, it simply frees the whole shadow PT.
1833 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1834 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1835 {
1836 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1837 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1838 PdeDst.n.u1Write = 0;
1839 }
1840 else
1841 {
1842 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1843 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1844 }
1845 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1846 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1847 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1848 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1849 }
1850 else
1851 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1852 }
1853# if defined(IN_RC)
1854 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1855 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1856# endif
1857 return VINF_SUCCESS;
1858 }
1859 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1860 }
1861 else
1862 {
1863 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1864 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1865 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1866 }
1867
1868 /*
1869 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1870 * Yea, I'm lazy.
1871 */
1872 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1873 ASMAtomicWriteSize(pPdeDst, 0);
1874
1875# if defined(IN_RC)
1876 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1877 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1878# endif
1879 PGM_INVL_VCPU_TLBS(pVCpu);
1880 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1881
1882#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1883 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1884 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1885 && !defined(IN_RC)
1886
1887# ifdef PGM_SYNC_N_PAGES
1888 /*
1889 * Get the shadow PDE, find the shadow page table in the pool.
1890 */
1891# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1892 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
1893
1894# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1895 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
1896
1897# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1898 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1899 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1900 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1901 X86PDEPAE PdeDst;
1902 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1903
1904 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1905 AssertRCSuccessReturn(rc, rc);
1906 Assert(pPDDst && pPdptDst);
1907 PdeDst = pPDDst->a[iPDDst];
1908# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1909 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1910 PEPTPD pPDDst;
1911 EPTPDE PdeDst;
1912
1913 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
1914 if (rc != VINF_SUCCESS)
1915 {
1916 AssertRC(rc);
1917 return rc;
1918 }
1919 Assert(pPDDst);
1920 PdeDst = pPDDst->a[iPDDst];
1921# endif
1922 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1923 if (!PdeDst.n.u1Present)
1924 {
1925 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
1926 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1927 return VINF_SUCCESS; /* force the instruction to be executed again. */
1928 }
1929
1930 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
1931 if (PdeDst.n.u1Size)
1932 {
1933 Assert(HWACCMIsNestedPagingActive(pVM));
1934 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
1935 return VINF_SUCCESS;
1936 }
1937
1938 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1939 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1940
1941 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1942 if ( cPages > 1
1943 && !(uErr & X86_TRAP_PF_P)
1944 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1945 {
1946 /*
1947 * This code path is currently only taken when the caller is PGMTrap0eHandler
1948 * for non-present pages!
1949 *
1950 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1951 * deal with locality.
1952 */
1953 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1954 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1955 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1956 iPTDst = 0;
1957 else
1958 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1959 for (; iPTDst < iPTDstEnd; iPTDst++)
1960 {
1961 if (!pPTDst->a[iPTDst].n.u1Present)
1962 {
1963 GSTPTE PteSrc;
1964
1965 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
1966
1967 /* Fake the page table entry */
1968 PteSrc.u = GCPtrCurPage;
1969 PteSrc.n.u1Present = 1;
1970 PteSrc.n.u1Dirty = 1;
1971 PteSrc.n.u1Accessed = 1;
1972 PteSrc.n.u1Write = 1;
1973 PteSrc.n.u1User = 1;
1974
1975 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1976
1977 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1978 GCPtrCurPage, PteSrc.n.u1Present,
1979 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1980 PteSrc.n.u1User & PdeSrc.n.u1User,
1981 (uint64_t)PteSrc.u,
1982 (uint64_t)pPTDst->a[iPTDst].u,
1983 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1984
1985 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
1986 break;
1987 }
1988 else
1989 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
1990 }
1991 }
1992 else
1993# endif /* PGM_SYNC_N_PAGES */
1994 {
1995 GSTPTE PteSrc;
1996 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1997 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
1998
1999 /* Fake the page table entry */
2000 PteSrc.u = GCPtrCurPage;
2001 PteSrc.n.u1Present = 1;
2002 PteSrc.n.u1Dirty = 1;
2003 PteSrc.n.u1Accessed = 1;
2004 PteSrc.n.u1Write = 1;
2005 PteSrc.n.u1User = 1;
2006 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2007
2008 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2009 GCPtrPage, PteSrc.n.u1Present,
2010 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2011 PteSrc.n.u1User & PdeSrc.n.u1User,
2012 (uint64_t)PteSrc.u,
2013 (uint64_t)pPTDst->a[iPTDst].u,
2014 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2015 }
2016 return VINF_SUCCESS;
2017
2018#else
2019 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2020 return VERR_INTERNAL_ERROR;
2021#endif
2022}
2023
2024
2025#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2026/**
2027 * Investigate page fault and handle write protection page faults caused by
2028 * dirty bit tracking.
2029 *
2030 * @returns VBox status code.
2031 * @param pVCpu The VMCPU handle.
2032 * @param uErr Page fault error code.
2033 * @param pPdeSrc Guest page directory entry.
2034 * @param GCPtrPage Guest context page address.
2035 */
2036PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2037{
2038 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2039 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2040 bool fMaybeWriteProtFault = fWriteFault && (fUserLevelFault || CPUMIsGuestR0WriteProtEnabled(pVCpu));
2041# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2042 bool fMaybeNXEFault = (uErr & X86_TRAP_PF_ID) && CPUMIsGuestNXEnabled(pVCpu);
2043# endif
2044 unsigned uPageFaultLevel;
2045 int rc;
2046 PVM pVM = pVCpu->CTX_SUFF(pVM);
2047
2048 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2049
2050# if PGM_GST_TYPE == PGM_TYPE_PAE \
2051 || PGM_GST_TYPE == PGM_TYPE_AMD64
2052
2053# if PGM_GST_TYPE == PGM_TYPE_AMD64
2054 PX86PML4E pPml4eSrc;
2055 PX86PDPE pPdpeSrc;
2056
2057 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc);
2058 Assert(pPml4eSrc);
2059
2060 /*
2061 * Real page fault? (PML4E level)
2062 */
2063 if ( (uErr & X86_TRAP_PF_RSVD)
2064 || !pPml4eSrc->n.u1Present
2065 || (fMaybeWriteProtFault && !pPml4eSrc->n.u1Write)
2066 || (fMaybeNXEFault && pPml4eSrc->n.u1NoExecute)
2067 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2068 )
2069 {
2070 uPageFaultLevel = 0;
2071 goto l_UpperLevelPageFault;
2072 }
2073 Assert(pPdpeSrc);
2074
2075# else /* PAE */
2076 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVCpu->pgm.s, GCPtrPage);
2077# endif /* PAE */
2078
2079 /*
2080 * Real page fault? (PDPE level)
2081 */
2082 if ( (uErr & X86_TRAP_PF_RSVD)
2083 || !pPdpeSrc->n.u1Present
2084# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2085 || (fMaybeWriteProtFault && !pPdpeSrc->lm.u1Write)
2086 || (fMaybeNXEFault && pPdpeSrc->lm.u1NoExecute)
2087 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2088# endif
2089 )
2090 {
2091 uPageFaultLevel = 1;
2092 goto l_UpperLevelPageFault;
2093 }
2094# endif
2095
2096 /*
2097 * Real page fault? (PDE level)
2098 */
2099 if ( (uErr & X86_TRAP_PF_RSVD)
2100 || !pPdeSrc->n.u1Present
2101 || (fMaybeWriteProtFault && !pPdeSrc->n.u1Write)
2102# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2103 || (fMaybeNXEFault && pPdeSrc->n.u1NoExecute)
2104# endif
2105 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2106 {
2107 uPageFaultLevel = 2;
2108 goto l_UpperLevelPageFault;
2109 }
2110
2111 /*
2112 * First check the easy case where the page directory has been marked read-only to track
2113 * the dirty bit of an emulated BIG page
2114 */
2115 if ( pPdeSrc->b.u1Size
2116# if PGM_GST_TYPE == PGM_TYPE_32BIT
2117 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2118# endif
2119 )
2120 {
2121 /* Mark guest page directory as accessed */
2122# if PGM_GST_TYPE == PGM_TYPE_AMD64
2123 pPml4eSrc->n.u1Accessed = 1;
2124 pPdpeSrc->lm.u1Accessed = 1;
2125# endif
2126 pPdeSrc->b.u1Accessed = 1;
2127
2128 /*
2129 * Only write protection page faults are relevant here.
2130 */
2131 if (fWriteFault)
2132 {
2133 /* Mark guest page directory as dirty (BIG page only). */
2134 pPdeSrc->b.u1Dirty = 1;
2135 }
2136 return VINF_SUCCESS;
2137 }
2138 /* else: 4KB page table */
2139
2140 /*
2141 * Map the guest page table.
2142 */
2143 PGSTPT pPTSrc;
2144 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2145 if (RT_SUCCESS(rc))
2146 {
2147 /*
2148 * Real page fault?
2149 */
2150 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2151 const GSTPTE PteSrc = *pPteSrc;
2152 if ( !PteSrc.n.u1Present
2153 || (fMaybeWriteProtFault && !PteSrc.n.u1Write)
2154# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2155 || (fMaybeNXEFault && PteSrc.n.u1NoExecute)
2156# endif
2157 || (fUserLevelFault && !PteSrc.n.u1User)
2158 )
2159 {
2160 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2161 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2162
2163 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2164 * See the 2nd case above as well.
2165 */
2166 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2167 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2168
2169 return VINF_EM_RAW_GUEST_TRAP;
2170 }
2171 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2172
2173 /*
2174 * Set the accessed bits in the page directory and the page table.
2175 */
2176# if PGM_GST_TYPE == PGM_TYPE_AMD64
2177 pPml4eSrc->n.u1Accessed = 1;
2178 pPdpeSrc->lm.u1Accessed = 1;
2179# endif
2180 pPdeSrc->n.u1Accessed = 1;
2181 pPteSrc->n.u1Accessed = 1;
2182
2183 /*
2184 * Only write protection page faults are relevant here.
2185 */
2186 if (fWriteFault)
2187 {
2188 /* Write access, so mark guest entry as dirty. */
2189# ifdef VBOX_WITH_STATISTICS
2190 if (!pPteSrc->n.u1Dirty)
2191 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2192 else
2193 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2194# endif
2195
2196 pPteSrc->n.u1Dirty = 1;
2197 }
2198 return VINF_SUCCESS;
2199 }
2200 AssertRC(rc);
2201 return rc;
2202
2203
2204l_UpperLevelPageFault:
2205 /*
2206 * Pagefault detected while checking the PML4E, PDPE or PDE.
2207 * Single exit handler to get rid of duplicate code paths.
2208 */
2209 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2210 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2211
2212 if ( 1
2213# if PGM_GST_TYPE == PGM_TYPE_AMD64
2214 && pPml4eSrc->n.u1Present
2215# endif
2216# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2217 && pPdpeSrc->n.u1Present
2218# endif
2219 && pPdeSrc->n.u1Present)
2220 {
2221 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2222 if ( pPdeSrc->b.u1Size
2223# if PGM_GST_TYPE == PGM_TYPE_32BIT
2224 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2225# endif
2226 )
2227 {
2228 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2229 }
2230 else
2231 {
2232 /*
2233 * Map the guest page table.
2234 */
2235 PGSTPT pPTSrc2;
2236 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc2);
2237 if (RT_SUCCESS(rc))
2238 {
2239 PGSTPTE pPteSrc = &pPTSrc2->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2240 if (pPteSrc->n.u1Present)
2241 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2242 }
2243 AssertRC(rc);
2244 }
2245 }
2246 return VINF_EM_RAW_GUEST_TRAP;
2247}
2248
2249/**
2250 * Handle dirty bit tracking faults.
2251 *
2252 * @returns VBox status code.
2253 * @param pVCpu The VMCPU handle.
2254 * @param uErr Page fault error code.
2255 * @param pPdeSrc Guest page directory entry.
2256 * @param pPdeDst Shadow page directory entry.
2257 * @param GCPtrPage Guest context page address.
2258 */
2259PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2260{
2261# if PGM_GST_TYPE == PGM_TYPE_32BIT
2262 const bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
2263# else
2264 const bool fBigPagesSupported = true;
2265# endif
2266 PVM pVM = pVCpu->CTX_SUFF(pVM);
2267 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2268
2269 Assert(PGMIsLockOwner(pVM));
2270
2271 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2272 {
2273 if ( pPdeDst->n.u1Present
2274 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2275 {
2276 SHWPDE PdeDst = *pPdeDst;
2277
2278 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2279 Assert(pPdeSrc->b.u1Write);
2280
2281 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2282 * fault again and take this path to only invalidate the entry.
2283 */
2284 PdeDst.n.u1Write = 1;
2285 PdeDst.n.u1Accessed = 1;
2286 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2287 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2288 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2289 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2290 }
2291# ifdef IN_RING0
2292 else
2293 /* Check for stale TLB entry; only applies to the SMP guest case. */
2294 if ( pVM->cCpus > 1
2295 && pPdeDst->n.u1Write
2296 && pPdeDst->n.u1Accessed)
2297 {
2298 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2299 if (pShwPage)
2300 {
2301 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2302 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2303 if ( pPteDst->n.u1Present
2304 && pPteDst->n.u1Write)
2305 {
2306 /* Stale TLB entry. */
2307 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2308 PGM_INVL_PG(pVCpu, GCPtrPage);
2309 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2310 }
2311 }
2312 }
2313# endif /* IN_RING0 */
2314 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2315 }
2316
2317 /*
2318 * Map the guest page table.
2319 */
2320 PGSTPT pPTSrc;
2321 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2322 if (RT_SUCCESS(rc))
2323 {
2324 if (pPdeDst->n.u1Present)
2325 {
2326 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2327 const GSTPTE PteSrc = *pPteSrc;
2328#ifndef IN_RING0
2329 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2330 * Our individual shadow handlers will provide more information and force a fatal exit.
2331 */
2332 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2333 {
2334 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2335 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2336 }
2337#endif
2338 /*
2339 * Map shadow page table.
2340 */
2341 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2342 if (pShwPage)
2343 {
2344 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2345 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2346 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2347 {
2348 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2349 {
2350 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2351 SHWPTE PteDst = *pPteDst;
2352
2353 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2354 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2355
2356 Assert(pPteSrc->n.u1Write);
2357
2358 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2359 * fault again and take this path to only invalidate the entry.
2360 */
2361 if (RT_LIKELY(pPage))
2362 {
2363 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2364 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2365 PteDst.n.u1Write = 0;
2366 else
2367 {
2368 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2369 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2370 {
2371 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, pPteSrc->u & GST_PTE_PG_MASK);
2372 AssertRC(rc);
2373 }
2374 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2375 PteDst.n.u1Write = 1;
2376 else
2377 PteDst.n.u1Write = 0;
2378 }
2379 }
2380 else
2381 PteDst.n.u1Write = 1;
2382
2383 PteDst.n.u1Dirty = 1;
2384 PteDst.n.u1Accessed = 1;
2385 PteDst.au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2386 ASMAtomicWriteSize(pPteDst, PteDst.u);
2387 PGM_INVL_PG(pVCpu, GCPtrPage);
2388 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2389 }
2390# ifdef IN_RING0
2391 else
2392 /* Check for stale TLB entry; only applies to the SMP guest case. */
2393 if ( pVM->cCpus > 1
2394 && pPteDst->n.u1Write == 1
2395 && pPteDst->n.u1Accessed == 1)
2396 {
2397 /* Stale TLB entry. */
2398 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2399 PGM_INVL_PG(pVCpu, GCPtrPage);
2400 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2401 }
2402# endif
2403 }
2404 }
2405 else
2406 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2407 }
2408 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2409 }
2410 AssertRC(rc);
2411 return rc;
2412}
2413#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2414
2415
2416/**
2417 * Sync a shadow page table.
2418 *
2419 * The shadow page table is not present. This includes the case where
2420 * there is a conflict with a mapping.
2421 *
2422 * @returns VBox status code.
2423 * @param pVCpu The VMCPU handle.
2424 * @param iPD Page directory index.
2425 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2426 * Assume this is a temporary mapping.
2427 * @param GCPtrPage GC Pointer of the page that caused the fault
2428 */
2429PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2430{
2431 PVM pVM = pVCpu->CTX_SUFF(pVM);
2432 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2433
2434 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2435#if 0 /* rarely useful; leave for debugging. */
2436 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2437#endif
2438 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2439
2440 Assert(PGMIsLocked(pVM));
2441
2442#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2443 || PGM_GST_TYPE == PGM_TYPE_PAE \
2444 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2445 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2446 && PGM_SHW_TYPE != PGM_TYPE_EPT
2447
2448 int rc = VINF_SUCCESS;
2449
2450 /*
2451 * Validate input a little bit.
2452 */
2453 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2454# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2455 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2456 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2457
2458 /* Fetch the pgm pool shadow descriptor. */
2459 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2460 Assert(pShwPde);
2461
2462# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2463 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2464 PPGMPOOLPAGE pShwPde = NULL;
2465 PX86PDPAE pPDDst;
2466 PSHWPDE pPdeDst;
2467
2468 /* Fetch the pgm pool shadow descriptor. */
2469 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2470 AssertRCSuccessReturn(rc, rc);
2471 Assert(pShwPde);
2472
2473 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2474 pPdeDst = &pPDDst->a[iPDDst];
2475
2476# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2477 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2478 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2479 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2480 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2481 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2482 AssertRCSuccessReturn(rc, rc);
2483 Assert(pPDDst);
2484 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2485# endif
2486 SHWPDE PdeDst = *pPdeDst;
2487
2488# if PGM_GST_TYPE == PGM_TYPE_AMD64
2489 /* Fetch the pgm pool shadow descriptor. */
2490 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2491 Assert(pShwPde);
2492# endif
2493
2494# ifndef PGM_WITHOUT_MAPPINGS
2495 /*
2496 * Check for conflicts.
2497 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2498 * HC: Simply resolve the conflict.
2499 */
2500 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2501 {
2502 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2503# ifndef IN_RING3
2504 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2505 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2506 return VERR_ADDRESS_CONFLICT;
2507# else
2508 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2509 Assert(pMapping);
2510# if PGM_GST_TYPE == PGM_TYPE_32BIT
2511 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2512# elif PGM_GST_TYPE == PGM_TYPE_PAE
2513 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2514# else
2515 AssertFailed(); /* can't happen for amd64 */
2516# endif
2517 if (RT_FAILURE(rc))
2518 {
2519 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2520 return rc;
2521 }
2522 PdeDst = *pPdeDst;
2523# endif
2524 }
2525# endif /* !PGM_WITHOUT_MAPPINGS */
2526 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2527
2528# if defined(IN_RC)
2529 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2530 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2531# endif
2532
2533 /*
2534 * Sync page directory entry.
2535 */
2536 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2537 if (PdeSrc.n.u1Present)
2538 {
2539 /*
2540 * Allocate & map the page table.
2541 */
2542 PSHWPT pPTDst;
2543# if PGM_GST_TYPE == PGM_TYPE_32BIT
2544 const bool fPageTable = !PdeSrc.b.u1Size || !CPUMIsGuestPageSizeExtEnabled(pVCpu);
2545# else
2546 const bool fPageTable = !PdeSrc.b.u1Size;
2547# endif
2548 PPGMPOOLPAGE pShwPage;
2549 RTGCPHYS GCPhys;
2550 if (fPageTable)
2551 {
2552 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2553# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2554 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2555 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2556# endif
2557 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2558 }
2559 else
2560 {
2561 PGMPOOLACCESS enmAccess;
2562# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2563 const bool fNoExecute = PdeSrc.n.u1NoExecute && CPUMIsGuestNXEnabled(pVCpu);
2564# else
2565 const bool fNoExecute = false;
2566# endif
2567
2568 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2569# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2570 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2571 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2572# endif
2573 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2574 if (PdeSrc.n.u1User)
2575 {
2576 if (PdeSrc.n.u1Write)
2577 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2578 else
2579 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2580 }
2581 else
2582 {
2583 if (PdeSrc.n.u1Write)
2584 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2585 else
2586 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2587 }
2588 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2589 }
2590 if (rc == VINF_SUCCESS)
2591 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2592 else if (rc == VINF_PGM_CACHED_PAGE)
2593 {
2594 /*
2595 * The PT was cached, just hook it up.
2596 */
2597 if (fPageTable)
2598 PdeDst.u = pShwPage->Core.Key
2599 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2600 else
2601 {
2602 PdeDst.u = pShwPage->Core.Key
2603 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2604 /* (see explanation and assumptions further down.) */
2605 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2606 {
2607 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2608 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2609 PdeDst.b.u1Write = 0;
2610 }
2611 }
2612 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2613# if defined(IN_RC)
2614 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2615# endif
2616 return VINF_SUCCESS;
2617 }
2618 else if (rc == VERR_PGM_POOL_FLUSHED)
2619 {
2620 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2621# if defined(IN_RC)
2622 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2623# endif
2624 return VINF_PGM_SYNC_CR3;
2625 }
2626 else
2627 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2628 PdeDst.u &= X86_PDE_AVL_MASK;
2629 PdeDst.u |= pShwPage->Core.Key;
2630
2631 /*
2632 * Page directory has been accessed (this is a fault situation, remember).
2633 */
2634 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2635 if (fPageTable)
2636 {
2637 /*
2638 * Page table - 4KB.
2639 *
2640 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2641 */
2642 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2643 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2644 PGSTPT pPTSrc;
2645 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2646 if (RT_SUCCESS(rc))
2647 {
2648 /*
2649 * Start by syncing the page directory entry so CSAM's TLB trick works.
2650 */
2651 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2652 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2653 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2654# if defined(IN_RC)
2655 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2656# endif
2657
2658 /*
2659 * Directory/page user or supervisor privilege: (same goes for read/write)
2660 *
2661 * Directory Page Combined
2662 * U/S U/S U/S
2663 * 0 0 0
2664 * 0 1 0
2665 * 1 0 0
2666 * 1 1 1
2667 *
2668 * Simple AND operation. Table listed for completeness.
2669 *
2670 */
2671 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2672# ifdef PGM_SYNC_N_PAGES
2673 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2674 unsigned iPTDst = iPTBase;
2675 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2676 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2677 iPTDst = 0;
2678 else
2679 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2680# else /* !PGM_SYNC_N_PAGES */
2681 unsigned iPTDst = 0;
2682 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2683# endif /* !PGM_SYNC_N_PAGES */
2684# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2685 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2686 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2687# else
2688 const unsigned offPTSrc = 0;
2689# endif
2690 for (; iPTDst < iPTDstEnd; iPTDst++)
2691 {
2692 const unsigned iPTSrc = iPTDst + offPTSrc;
2693 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2694
2695 if (PteSrc.n.u1Present) /* we've already cleared it above */
2696 {
2697# ifndef IN_RING0
2698 /*
2699 * Assuming kernel code will be marked as supervisor - and not as user level
2700 * and executed using a conforming code selector - And marked as readonly.
2701 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2702 */
2703 PPGMPAGE pPage;
2704 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2705 || !CSAMDoesPageNeedScanning(pVM, (iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT))
2706 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2707 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2708 )
2709# endif
2710 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2711 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2712 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2713 PteSrc.n.u1Present,
2714 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2715 PteSrc.n.u1User & PdeSrc.n.u1User,
2716 (uint64_t)PteSrc.u,
2717 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2718 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2719 }
2720 } /* for PTEs */
2721 }
2722 }
2723 else
2724 {
2725 /*
2726 * Big page - 2/4MB.
2727 *
2728 * We'll walk the ram range list in parallel and optimize lookups.
2729 * We will only sync on shadow page table at a time.
2730 */
2731 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2732
2733 /**
2734 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2735 */
2736
2737 /*
2738 * Start by syncing the page directory entry.
2739 */
2740 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2741 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2742
2743 /*
2744 * If the page is not flagged as dirty and is writable, then make it read-only
2745 * at PD level, so we can set the dirty bit when the page is modified.
2746 *
2747 * ASSUMES that page access handlers are implemented on page table entry level.
2748 * Thus we will first catch the dirty access and set PDE.D and restart. If
2749 * there is an access handler, we'll trap again and let it work on the problem.
2750 */
2751 /** @todo move the above stuff to a section in the PGM documentation. */
2752 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2753 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2754 {
2755 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2756 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2757 PdeDst.b.u1Write = 0;
2758 }
2759 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2760# if defined(IN_RC)
2761 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2762# endif
2763
2764 /*
2765 * Fill the shadow page table.
2766 */
2767 /* Get address and flags from the source PDE. */
2768 SHWPTE PteDstBase;
2769 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2770
2771 /* Loop thru the entries in the shadow PT. */
2772 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2773 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2774 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2775 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2776 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2777 unsigned iPTDst = 0;
2778 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2779 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2780 {
2781 /* Advance ram range list. */
2782 while (pRam && GCPhys > pRam->GCPhysLast)
2783 pRam = pRam->CTX_SUFF(pNext);
2784 if (pRam && GCPhys >= pRam->GCPhys)
2785 {
2786 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2787 do
2788 {
2789 /* Make shadow PTE. */
2790 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2791 SHWPTE PteDst;
2792
2793# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2794 /* Try make the page writable if necessary. */
2795 if ( PteDstBase.n.u1Write
2796 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2797# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2798 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2799# endif
2800 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2801 {
2802 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
2803 AssertRCReturn(rc, rc);
2804 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2805 break;
2806 }
2807# endif
2808
2809 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2810 {
2811 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2812 {
2813 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2814 PteDst.n.u1Write = 0;
2815 }
2816 else
2817 PteDst.u = 0;
2818 }
2819# ifndef IN_RING0
2820 /*
2821 * Assuming kernel code will be marked as supervisor and not as user level and executed
2822 * using a conforming code selector. Don't check for readonly, as that implies the whole
2823 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2824 */
2825 else if ( !PdeSrc.n.u1User
2826 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
2827 PteDst.u = 0;
2828# endif
2829 else
2830 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2831
2832 /* Only map writable pages writable. */
2833 if ( PteDst.n.u1Write
2834 && PteDst.n.u1Present
2835 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2836 {
2837 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2838 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2839 }
2840
2841 if (PteDst.n.u1Present)
2842 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2843
2844 /* commit it */
2845 pPTDst->a[iPTDst] = PteDst;
2846 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2847 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2848 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2849
2850 /* advance */
2851 GCPhys += PAGE_SIZE;
2852 iHCPage++;
2853 iPTDst++;
2854 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2855 && GCPhys <= pRam->GCPhysLast);
2856 }
2857 else if (pRam)
2858 {
2859 Log(("Invalid pages at %RGp\n", GCPhys));
2860 do
2861 {
2862 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2863 GCPhys += PAGE_SIZE;
2864 iPTDst++;
2865 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2866 && GCPhys < pRam->GCPhys);
2867 }
2868 else
2869 {
2870 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2871 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2872 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2873 }
2874 } /* while more PTEs */
2875 } /* 4KB / 4MB */
2876 }
2877 else
2878 AssertRelease(!PdeDst.n.u1Present);
2879
2880 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2881 if (RT_FAILURE(rc))
2882 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2883 return rc;
2884
2885#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2886 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2887 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2888 && !defined(IN_RC)
2889
2890 /*
2891 * Validate input a little bit.
2892 */
2893 int rc = VINF_SUCCESS;
2894# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2895 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2896 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2897
2898 /* Fetch the pgm pool shadow descriptor. */
2899 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2900 Assert(pShwPde);
2901
2902# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2903 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2904 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
2905 PX86PDPAE pPDDst;
2906 PSHWPDE pPdeDst;
2907
2908 /* Fetch the pgm pool shadow descriptor. */
2909 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2910 AssertRCSuccessReturn(rc, rc);
2911 Assert(pShwPde);
2912
2913 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2914 pPdeDst = &pPDDst->a[iPDDst];
2915
2916# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2917 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2918 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2919 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2920 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
2921 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2922 AssertRCSuccessReturn(rc, rc);
2923 Assert(pPDDst);
2924 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2925
2926 /* Fetch the pgm pool shadow descriptor. */
2927 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2928 Assert(pShwPde);
2929
2930# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2931 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2932 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2933 PEPTPD pPDDst;
2934 PEPTPDPT pPdptDst;
2935
2936 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
2937 if (rc != VINF_SUCCESS)
2938 {
2939 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2940 AssertRC(rc);
2941 return rc;
2942 }
2943 Assert(pPDDst);
2944 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2945
2946 /* Fetch the pgm pool shadow descriptor. */
2947 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2948 Assert(pShwPde);
2949# endif
2950 SHWPDE PdeDst = *pPdeDst;
2951
2952 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2953 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2954
2955# if defined(PGM_WITH_LARGE_PAGES) && (PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE)
2956# if (PGM_SHW_TYPE != PGM_TYPE_EPT) /* PGM_TYPE_EPT implies nested paging */
2957 if (HWACCMIsNestedPagingActive(pVM))
2958# endif
2959 {
2960 PPGMPAGE pPage;
2961
2962 /* Check if we allocated a big page before for this 2 MB range. */
2963 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
2964 if (RT_SUCCESS(rc))
2965 {
2966 RTHCPHYS HCPhys = NIL_RTHCPHYS;
2967
2968 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
2969 {
2970 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
2971 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2972 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2973 }
2974 else
2975 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
2976 {
2977 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
2978 rc = pgmPhysIsValidLargePage(pVM, GCPtrPage, pPage);
2979 if (RT_SUCCESS(rc))
2980 {
2981 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2982 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
2983 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2984 }
2985 }
2986 else
2987 if (PGMIsUsingLargePages(pVM))
2988 {
2989 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
2990 if (RT_SUCCESS(rc))
2991 {
2992 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2993 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
2994 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2995 }
2996 else
2997 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
2998 }
2999
3000 if (HCPhys != NIL_RTHCPHYS)
3001 {
3002 PdeDst.u &= X86_PDE_AVL_MASK;
3003 PdeDst.u |= HCPhys;
3004 PdeDst.n.u1Present = 1;
3005 PdeDst.n.u1Write = 1;
3006 PdeDst.b.u1Size = 1;
3007# if PGM_SHW_TYPE == PGM_TYPE_EPT
3008 PdeDst.n.u1Execute = 1;
3009 PdeDst.b.u1IgnorePAT = 1;
3010 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3011# else
3012 PdeDst.n.u1User = 1;
3013# endif
3014 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3015
3016 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3017 /* Add a reference to the first page only. */
3018 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3019
3020 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3021 return VINF_SUCCESS;
3022 }
3023 }
3024 }
3025# endif /* HC_ARCH_BITS == 64 */
3026
3027 GSTPDE PdeSrc;
3028 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3029 PdeSrc.n.u1Present = 1;
3030 PdeSrc.n.u1Write = 1;
3031 PdeSrc.n.u1Accessed = 1;
3032 PdeSrc.n.u1User = 1;
3033
3034 /*
3035 * Allocate & map the page table.
3036 */
3037 PSHWPT pPTDst;
3038 PPGMPOOLPAGE pShwPage;
3039 RTGCPHYS GCPhys;
3040
3041 /* Virtual address = physical address */
3042 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3043 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3044
3045 if ( rc == VINF_SUCCESS
3046 || rc == VINF_PGM_CACHED_PAGE)
3047 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3048 else
3049 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3050
3051 PdeDst.u &= X86_PDE_AVL_MASK;
3052 PdeDst.u |= pShwPage->Core.Key;
3053 PdeDst.n.u1Present = 1;
3054 PdeDst.n.u1Write = 1;
3055# if PGM_SHW_TYPE == PGM_TYPE_EPT
3056 PdeDst.n.u1Execute = 1;
3057# else
3058 PdeDst.n.u1User = 1;
3059 PdeDst.n.u1Accessed = 1;
3060# endif
3061 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3062
3063 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3064 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3065 return rc;
3066
3067#else
3068 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3069 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3070 return VERR_INTERNAL_ERROR;
3071#endif
3072}
3073
3074
3075
3076/**
3077 * Prefetch a page/set of pages.
3078 *
3079 * Typically used to sync commonly used pages before entering raw mode
3080 * after a CR3 reload.
3081 *
3082 * @returns VBox status code.
3083 * @param pVCpu The VMCPU handle.
3084 * @param GCPtrPage Page to invalidate.
3085 */
3086PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3087{
3088#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3089 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3090 /*
3091 * Check that all Guest levels thru the PDE are present, getting the
3092 * PD and PDE in the processes.
3093 */
3094 int rc = VINF_SUCCESS;
3095# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3096# if PGM_GST_TYPE == PGM_TYPE_32BIT
3097 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3098 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3099# elif PGM_GST_TYPE == PGM_TYPE_PAE
3100 unsigned iPDSrc;
3101 X86PDPE PdpeSrc;
3102 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3103 if (!pPDSrc)
3104 return VINF_SUCCESS; /* not present */
3105# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3106 unsigned iPDSrc;
3107 PX86PML4E pPml4eSrc;
3108 X86PDPE PdpeSrc;
3109 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3110 if (!pPDSrc)
3111 return VINF_SUCCESS; /* not present */
3112# endif
3113 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3114# else
3115 PGSTPD pPDSrc = NULL;
3116 const unsigned iPDSrc = 0;
3117 GSTPDE PdeSrc;
3118
3119 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3120 PdeSrc.n.u1Present = 1;
3121 PdeSrc.n.u1Write = 1;
3122 PdeSrc.n.u1Accessed = 1;
3123 PdeSrc.n.u1User = 1;
3124# endif
3125
3126 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3127 {
3128 PVM pVM = pVCpu->CTX_SUFF(pVM);
3129 pgmLock(pVM);
3130
3131# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3132 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3133# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3134 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3135 PX86PDPAE pPDDst;
3136 X86PDEPAE PdeDst;
3137# if PGM_GST_TYPE != PGM_TYPE_PAE
3138 X86PDPE PdpeSrc;
3139
3140 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3141 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3142# endif
3143 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3144 if (rc != VINF_SUCCESS)
3145 {
3146 pgmUnlock(pVM);
3147 AssertRC(rc);
3148 return rc;
3149 }
3150 Assert(pPDDst);
3151 PdeDst = pPDDst->a[iPDDst];
3152
3153# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3154 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3155 PX86PDPAE pPDDst;
3156 X86PDEPAE PdeDst;
3157
3158# if PGM_GST_TYPE == PGM_TYPE_PROT
3159 /* AMD-V nested paging */
3160 X86PML4E Pml4eSrc;
3161 X86PDPE PdpeSrc;
3162 PX86PML4E pPml4eSrc = &Pml4eSrc;
3163
3164 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3165 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3166 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3167# endif
3168
3169 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3170 if (rc != VINF_SUCCESS)
3171 {
3172 pgmUnlock(pVM);
3173 AssertRC(rc);
3174 return rc;
3175 }
3176 Assert(pPDDst);
3177 PdeDst = pPDDst->a[iPDDst];
3178# endif
3179 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3180 {
3181 if (!PdeDst.n.u1Present)
3182 {
3183 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3184 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3185 }
3186 else
3187 {
3188 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3189 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3190 * makes no sense to prefetch more than one page.
3191 */
3192 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3193 if (RT_SUCCESS(rc))
3194 rc = VINF_SUCCESS;
3195 }
3196 }
3197 pgmUnlock(pVM);
3198 }
3199 return rc;
3200
3201#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3202 return VINF_SUCCESS; /* ignore */
3203#endif
3204}
3205
3206
3207
3208
3209/**
3210 * Syncs a page during a PGMVerifyAccess() call.
3211 *
3212 * @returns VBox status code (informational included).
3213 * @param pVCpu The VMCPU handle.
3214 * @param GCPtrPage The address of the page to sync.
3215 * @param fPage The effective guest page flags.
3216 * @param uErr The trap error code.
3217 */
3218PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3219{
3220 PVM pVM = pVCpu->CTX_SUFF(pVM);
3221
3222 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3223
3224 Assert(!HWACCMIsNestedPagingActive(pVM));
3225#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3226 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3227
3228# ifndef IN_RING0
3229 if (!(fPage & X86_PTE_US))
3230 {
3231 /*
3232 * Mark this page as safe.
3233 */
3234 /** @todo not correct for pages that contain both code and data!! */
3235 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3236 CSAMMarkPage(pVM, GCPtrPage, true);
3237 }
3238# endif
3239
3240 /*
3241 * Get guest PD and index.
3242 */
3243# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3244# if PGM_GST_TYPE == PGM_TYPE_32BIT
3245 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3246 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3247# elif PGM_GST_TYPE == PGM_TYPE_PAE
3248 unsigned iPDSrc = 0;
3249 X86PDPE PdpeSrc;
3250 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3251
3252 if (pPDSrc)
3253 {
3254 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3255 return VINF_EM_RAW_GUEST_TRAP;
3256 }
3257# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3258 unsigned iPDSrc;
3259 PX86PML4E pPml4eSrc;
3260 X86PDPE PdpeSrc;
3261 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3262 if (!pPDSrc)
3263 {
3264 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3265 return VINF_EM_RAW_GUEST_TRAP;
3266 }
3267# endif
3268# else
3269 PGSTPD pPDSrc = NULL;
3270 const unsigned iPDSrc = 0;
3271# endif
3272 int rc = VINF_SUCCESS;
3273
3274 pgmLock(pVM);
3275
3276 /*
3277 * First check if the shadow pd is present.
3278 */
3279# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3280 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3281# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3282 PX86PDEPAE pPdeDst;
3283 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3284 PX86PDPAE pPDDst;
3285# if PGM_GST_TYPE != PGM_TYPE_PAE
3286 X86PDPE PdpeSrc;
3287
3288 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3289 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3290# endif
3291 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3292 if (rc != VINF_SUCCESS)
3293 {
3294 pgmUnlock(pVM);
3295 AssertRC(rc);
3296 return rc;
3297 }
3298 Assert(pPDDst);
3299 pPdeDst = &pPDDst->a[iPDDst];
3300
3301# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3302 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3303 PX86PDPAE pPDDst;
3304 PX86PDEPAE pPdeDst;
3305
3306# if PGM_GST_TYPE == PGM_TYPE_PROT
3307 /* AMD-V nested paging */
3308 X86PML4E Pml4eSrc;
3309 X86PDPE PdpeSrc;
3310 PX86PML4E pPml4eSrc = &Pml4eSrc;
3311
3312 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3313 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3314 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3315# endif
3316
3317 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3318 if (rc != VINF_SUCCESS)
3319 {
3320 pgmUnlock(pVM);
3321 AssertRC(rc);
3322 return rc;
3323 }
3324 Assert(pPDDst);
3325 pPdeDst = &pPDDst->a[iPDDst];
3326# endif
3327
3328# if defined(IN_RC)
3329 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3330 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3331# endif
3332
3333 if (!pPdeDst->n.u1Present)
3334 {
3335 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3336 if (rc != VINF_SUCCESS)
3337 {
3338# if defined(IN_RC)
3339 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3340 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3341# endif
3342 pgmUnlock(pVM);
3343 AssertRC(rc);
3344 return rc;
3345 }
3346 }
3347
3348# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3349 /* Check for dirty bit fault */
3350 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3351 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3352 Log(("PGMVerifyAccess: success (dirty)\n"));
3353 else
3354 {
3355 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3356# else
3357 {
3358 GSTPDE PdeSrc;
3359 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3360 PdeSrc.n.u1Present = 1;
3361 PdeSrc.n.u1Write = 1;
3362 PdeSrc.n.u1Accessed = 1;
3363 PdeSrc.n.u1User = 1;
3364
3365# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3366 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3367 if (uErr & X86_TRAP_PF_US)
3368 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3369 else /* supervisor */
3370 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3371
3372 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3373 if (RT_SUCCESS(rc))
3374 {
3375 /* Page was successfully synced */
3376 Log2(("PGMVerifyAccess: success (sync)\n"));
3377 rc = VINF_SUCCESS;
3378 }
3379 else
3380 {
3381 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3382 rc = VINF_EM_RAW_GUEST_TRAP;
3383 }
3384 }
3385# if defined(IN_RC)
3386 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3387 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3388# endif
3389 pgmUnlock(pVM);
3390 return rc;
3391
3392#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3393
3394 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3395 return VERR_INTERNAL_ERROR;
3396#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3397}
3398
3399#undef MY_STAM_COUNTER_INC
3400#define MY_STAM_COUNTER_INC(a) do { } while (0)
3401
3402
3403/**
3404 * Syncs the paging hierarchy starting at CR3.
3405 *
3406 * @returns VBox status code, no specials.
3407 * @param pVCpu The VMCPU handle.
3408 * @param cr0 Guest context CR0 register
3409 * @param cr3 Guest context CR3 register
3410 * @param cr4 Guest context CR4 register
3411 * @param fGlobal Including global page directories or not
3412 */
3413PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3414{
3415 PVM pVM = pVCpu->CTX_SUFF(pVM);
3416
3417 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
3418 fGlobal = true; /* Change this CR3 reload to be a global one. */
3419
3420 LogFlow(("SyncCR3 %d\n", fGlobal));
3421
3422#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3423
3424 pgmLock(pVM);
3425# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3426 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3427 if (pPool->cDirtyPages)
3428 pgmPoolResetDirtyPages(pVM);
3429# endif
3430
3431 /*
3432 * Update page access handlers.
3433 * The virtual are always flushed, while the physical are only on demand.
3434 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3435 * have to look into that later because it will have a bad influence on the performance.
3436 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3437 * bird: Yes, but that won't work for aliases.
3438 */
3439 /** @todo this MUST go away. See #1557. */
3440 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3441 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3442 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3443 pgmUnlock(pVM);
3444#endif /* !NESTED && !EPT */
3445
3446#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3447 /*
3448 * Nested / EPT - almost no work.
3449 */
3450 /** @todo check if this is really necessary; the call does it as well... */
3451 HWACCMFlushTLB(pVCpu);
3452 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3453 return VINF_SUCCESS;
3454
3455#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3456 /*
3457 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3458 * out the shadow parts when the guest modifies its tables.
3459 */
3460 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3461 return VINF_SUCCESS;
3462
3463#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3464
3465# ifndef PGM_WITHOUT_MAPPINGS
3466 /*
3467 * Check for and resolve conflicts with our guest mappings if they
3468 * are enabled and not fixed.
3469 */
3470 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
3471 {
3472 int rc = pgmMapResolveConflicts(pVM);
3473 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3474 if (rc == VINF_PGM_SYNC_CR3)
3475 {
3476 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3477 return VINF_PGM_SYNC_CR3;
3478 }
3479 }
3480# else
3481 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3482# endif
3483 return VINF_SUCCESS;
3484#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3485}
3486
3487
3488
3489
3490#ifdef VBOX_STRICT
3491#ifdef IN_RC
3492# undef AssertMsgFailed
3493# define AssertMsgFailed Log
3494#endif
3495#ifdef IN_RING3
3496# include <VBox/dbgf.h>
3497
3498/**
3499 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3500 *
3501 * @returns VBox status code (VINF_SUCCESS).
3502 * @param cr3 The root of the hierarchy.
3503 * @param crr The cr4, only PAE and PSE is currently used.
3504 * @param fLongMode Set if long mode, false if not long mode.
3505 * @param cMaxDepth Number of levels to dump.
3506 * @param pHlp Pointer to the output functions.
3507 */
3508RT_C_DECLS_BEGIN
3509VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3510RT_C_DECLS_END
3511
3512#endif
3513
3514/**
3515 * Checks that the shadow page table is in sync with the guest one.
3516 *
3517 * @returns The number of errors.
3518 * @param pVM The virtual machine.
3519 * @param pVCpu The VMCPU handle.
3520 * @param cr3 Guest context CR3 register
3521 * @param cr4 Guest context CR4 register
3522 * @param GCPtr Where to start. Defaults to 0.
3523 * @param cb How much to check. Defaults to everything.
3524 */
3525PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3526{
3527#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3528 return 0;
3529#else
3530 unsigned cErrors = 0;
3531 PVM pVM = pVCpu->CTX_SUFF(pVM);
3532 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3533
3534#if PGM_GST_TYPE == PGM_TYPE_PAE
3535 /** @todo currently broken; crashes below somewhere */
3536 AssertFailed();
3537#endif
3538
3539#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3540 || PGM_GST_TYPE == PGM_TYPE_PAE \
3541 || PGM_GST_TYPE == PGM_TYPE_AMD64
3542
3543# if PGM_GST_TYPE == PGM_TYPE_32BIT
3544 bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
3545# else
3546 bool fBigPagesSupported = true;
3547# endif
3548 PPGMCPU pPGM = &pVCpu->pgm.s;
3549 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3550 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3551# ifndef IN_RING0
3552 RTHCPHYS HCPhys; /* general usage. */
3553# endif
3554 int rc;
3555
3556 /*
3557 * Check that the Guest CR3 and all its mappings are correct.
3558 */
3559 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3560 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3561 false);
3562# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3563# if PGM_GST_TYPE == PGM_TYPE_32BIT
3564 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3565# else
3566 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3567# endif
3568 AssertRCReturn(rc, 1);
3569 HCPhys = NIL_RTHCPHYS;
3570 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3571 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3572# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3573 pgmGstGet32bitPDPtr(pPGM);
3574 RTGCPHYS GCPhys;
3575 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3576 AssertRCReturn(rc, 1);
3577 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3578# endif
3579# endif /* !IN_RING0 */
3580
3581 /*
3582 * Get and check the Shadow CR3.
3583 */
3584# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3585 unsigned cPDEs = X86_PG_ENTRIES;
3586 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3587# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3588# if PGM_GST_TYPE == PGM_TYPE_32BIT
3589 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3590# else
3591 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3592# endif
3593 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3594# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3595 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3596 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3597# endif
3598 if (cb != ~(RTGCPTR)0)
3599 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3600
3601/** @todo call the other two PGMAssert*() functions. */
3602
3603# if PGM_GST_TYPE == PGM_TYPE_AMD64
3604 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3605
3606 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3607 {
3608 PPGMPOOLPAGE pShwPdpt = NULL;
3609 PX86PML4E pPml4eSrc;
3610 PX86PML4E pPml4eDst;
3611 RTGCPHYS GCPhysPdptSrc;
3612
3613 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3614 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3615
3616 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3617 if (!pPml4eDst->n.u1Present)
3618 {
3619 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3620 continue;
3621 }
3622
3623 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3624 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3625
3626 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3627 {
3628 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3629 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3630 cErrors++;
3631 continue;
3632 }
3633
3634 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3635 {
3636 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3637 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3638 cErrors++;
3639 continue;
3640 }
3641
3642 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3643 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3644 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3645 {
3646 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3647 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3648 cErrors++;
3649 continue;
3650 }
3651# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3652 {
3653# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3654
3655# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3656 /*
3657 * Check the PDPTEs too.
3658 */
3659 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3660
3661 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3662 {
3663 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3664 PPGMPOOLPAGE pShwPde = NULL;
3665 PX86PDPE pPdpeDst;
3666 RTGCPHYS GCPhysPdeSrc;
3667# if PGM_GST_TYPE == PGM_TYPE_PAE
3668 X86PDPE PdpeSrc;
3669 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3670 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3671# else
3672 PX86PML4E pPml4eSrcIgn;
3673 X86PDPE PdpeSrc;
3674 PX86PDPT pPdptDst;
3675 PX86PDPAE pPDDst;
3676 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3677
3678 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3679 if (rc != VINF_SUCCESS)
3680 {
3681 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3682 GCPtr += 512 * _2M;
3683 continue; /* next PDPTE */
3684 }
3685 Assert(pPDDst);
3686# endif
3687 Assert(iPDSrc == 0);
3688
3689 pPdpeDst = &pPdptDst->a[iPdpt];
3690
3691 if (!pPdpeDst->n.u1Present)
3692 {
3693 GCPtr += 512 * _2M;
3694 continue; /* next PDPTE */
3695 }
3696
3697 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3698 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3699
3700 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3701 {
3702 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3703 GCPtr += 512 * _2M;
3704 cErrors++;
3705 continue;
3706 }
3707
3708 if (GCPhysPdeSrc != pShwPde->GCPhys)
3709 {
3710# if PGM_GST_TYPE == PGM_TYPE_AMD64
3711 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3712# else
3713 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3714# endif
3715 GCPtr += 512 * _2M;
3716 cErrors++;
3717 continue;
3718 }
3719
3720# if PGM_GST_TYPE == PGM_TYPE_AMD64
3721 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3722 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3723 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3724 {
3725 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3726 GCPtr += 512 * _2M;
3727 cErrors++;
3728 continue;
3729 }
3730# endif
3731
3732# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3733 {
3734# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3735# if PGM_GST_TYPE == PGM_TYPE_32BIT
3736 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3737# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3738 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3739# endif
3740# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3741 /*
3742 * Iterate the shadow page directory.
3743 */
3744 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3745 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3746
3747 for (;
3748 iPDDst < cPDEs;
3749 iPDDst++, GCPtr += cIncrement)
3750 {
3751# if PGM_SHW_TYPE == PGM_TYPE_PAE
3752 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3753# else
3754 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3755# endif
3756 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3757 {
3758 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3759 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3760 {
3761 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3762 cErrors++;
3763 continue;
3764 }
3765 }
3766 else if ( (PdeDst.u & X86_PDE_P)
3767 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3768 )
3769 {
3770 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3771 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3772 if (!pPoolPage)
3773 {
3774 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3775 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3776 cErrors++;
3777 continue;
3778 }
3779 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3780
3781 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3782 {
3783 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3784 GCPtr, (uint64_t)PdeDst.u));
3785 cErrors++;
3786 }
3787
3788 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3789 {
3790 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3791 GCPtr, (uint64_t)PdeDst.u));
3792 cErrors++;
3793 }
3794
3795 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3796 if (!PdeSrc.n.u1Present)
3797 {
3798 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3799 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3800 cErrors++;
3801 continue;
3802 }
3803
3804 if ( !PdeSrc.b.u1Size
3805 || !fBigPagesSupported)
3806 {
3807 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3808# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3809 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3810# endif
3811 }
3812 else
3813 {
3814# if PGM_GST_TYPE == PGM_TYPE_32BIT
3815 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3816 {
3817 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3818 GCPtr, (uint64_t)PdeSrc.u));
3819 cErrors++;
3820 continue;
3821 }
3822# endif
3823 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3824# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3825 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3826# endif
3827 }
3828
3829 if ( pPoolPage->enmKind
3830 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3831 {
3832 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3833 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3834 cErrors++;
3835 }
3836
3837 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3838 if (!pPhysPage)
3839 {
3840 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3841 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3842 cErrors++;
3843 continue;
3844 }
3845
3846 if (GCPhysGst != pPoolPage->GCPhys)
3847 {
3848 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3849 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3850 cErrors++;
3851 continue;
3852 }
3853
3854 if ( !PdeSrc.b.u1Size
3855 || !fBigPagesSupported)
3856 {
3857 /*
3858 * Page Table.
3859 */
3860 const GSTPT *pPTSrc;
3861 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3862 if (RT_FAILURE(rc))
3863 {
3864 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3865 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3866 cErrors++;
3867 continue;
3868 }
3869 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3870 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3871 {
3872 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3873 // (This problem will go away when/if we shadow multiple CR3s.)
3874 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3875 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3876 cErrors++;
3877 continue;
3878 }
3879 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3880 {
3881 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3882 GCPtr, (uint64_t)PdeDst.u));
3883 cErrors++;
3884 continue;
3885 }
3886
3887 /* iterate the page table. */
3888# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3889 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3890 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3891# else
3892 const unsigned offPTSrc = 0;
3893# endif
3894 for (unsigned iPT = 0, off = 0;
3895 iPT < RT_ELEMENTS(pPTDst->a);
3896 iPT++, off += PAGE_SIZE)
3897 {
3898 const SHWPTE PteDst = pPTDst->a[iPT];
3899
3900 /* skip not-present entries. */
3901 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3902 continue;
3903 Assert(PteDst.n.u1Present);
3904
3905 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3906 if (!PteSrc.n.u1Present)
3907 {
3908# ifdef IN_RING3
3909 PGMAssertHandlerAndFlagsInSync(pVM);
3910 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3911# endif
3912 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3913 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3914 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3915 cErrors++;
3916 continue;
3917 }
3918
3919 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3920# if 1 /** @todo sync accessed bit properly... */
3921 fIgnoreFlags |= X86_PTE_A;
3922# endif
3923
3924 /* match the physical addresses */
3925 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3926 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3927
3928# ifdef IN_RING3
3929 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3930 if (RT_FAILURE(rc))
3931 {
3932 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3933 {
3934 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3935 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3936 cErrors++;
3937 continue;
3938 }
3939 }
3940 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3941 {
3942 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3943 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3944 cErrors++;
3945 continue;
3946 }
3947# endif
3948
3949 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3950 if (!pPhysPage)
3951 {
3952# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3953 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3954 {
3955 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3956 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3957 cErrors++;
3958 continue;
3959 }
3960# endif
3961 if (PteDst.n.u1Write)
3962 {
3963 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3964 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3965 cErrors++;
3966 }
3967 fIgnoreFlags |= X86_PTE_RW;
3968 }
3969 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
3970 {
3971 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3972 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3973 cErrors++;
3974 continue;
3975 }
3976
3977 /* flags */
3978 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3979 {
3980 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3981 {
3982 if (PteDst.n.u1Write)
3983 {
3984 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3985 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3986 cErrors++;
3987 continue;
3988 }
3989 fIgnoreFlags |= X86_PTE_RW;
3990 }
3991 else
3992 {
3993 if (PteDst.n.u1Present)
3994 {
3995 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3996 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3997 cErrors++;
3998 continue;
3999 }
4000 fIgnoreFlags |= X86_PTE_P;
4001 }
4002 }
4003 else
4004 {
4005 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4006 {
4007 if (PteDst.n.u1Write)
4008 {
4009 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4010 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4011 cErrors++;
4012 continue;
4013 }
4014 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4015 {
4016 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4017 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4018 cErrors++;
4019 continue;
4020 }
4021 if (PteDst.n.u1Dirty)
4022 {
4023 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4024 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4025 cErrors++;
4026 }
4027# if 0 /** @todo sync access bit properly... */
4028 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4029 {
4030 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4031 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4032 cErrors++;
4033 }
4034 fIgnoreFlags |= X86_PTE_RW;
4035# else
4036 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4037# endif
4038 }
4039 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4040 {
4041 /* access bit emulation (not implemented). */
4042 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4043 {
4044 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4045 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4046 cErrors++;
4047 continue;
4048 }
4049 if (!PteDst.n.u1Accessed)
4050 {
4051 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4052 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4053 cErrors++;
4054 }
4055 fIgnoreFlags |= X86_PTE_P;
4056 }
4057# ifdef DEBUG_sandervl
4058 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4059# endif
4060 }
4061
4062 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4063 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4064 )
4065 {
4066 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4067 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4068 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4069 cErrors++;
4070 continue;
4071 }
4072 } /* foreach PTE */
4073 }
4074 else
4075 {
4076 /*
4077 * Big Page.
4078 */
4079 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4080 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4081 {
4082 if (PdeDst.n.u1Write)
4083 {
4084 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4085 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4086 cErrors++;
4087 continue;
4088 }
4089 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4090 {
4091 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4092 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4093 cErrors++;
4094 continue;
4095 }
4096# if 0 /** @todo sync access bit properly... */
4097 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4098 {
4099 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4100 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4101 cErrors++;
4102 }
4103 fIgnoreFlags |= X86_PTE_RW;
4104# else
4105 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4106# endif
4107 }
4108 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4109 {
4110 /* access bit emulation (not implemented). */
4111 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4112 {
4113 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4114 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4115 cErrors++;
4116 continue;
4117 }
4118 if (!PdeDst.n.u1Accessed)
4119 {
4120 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4121 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4122 cErrors++;
4123 }
4124 fIgnoreFlags |= X86_PTE_P;
4125 }
4126
4127 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4128 {
4129 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4130 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4131 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4132 cErrors++;
4133 }
4134
4135 /* iterate the page table. */
4136 for (unsigned iPT = 0, off = 0;
4137 iPT < RT_ELEMENTS(pPTDst->a);
4138 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4139 {
4140 const SHWPTE PteDst = pPTDst->a[iPT];
4141
4142 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4143 {
4144 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4145 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4146 cErrors++;
4147 }
4148
4149 /* skip not-present entries. */
4150 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4151 continue;
4152
4153 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4154
4155 /* match the physical addresses */
4156 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4157
4158# ifdef IN_RING3
4159 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4160 if (RT_FAILURE(rc))
4161 {
4162 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4163 {
4164 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4165 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4166 cErrors++;
4167 }
4168 }
4169 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4170 {
4171 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4172 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4173 cErrors++;
4174 continue;
4175 }
4176# endif
4177 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4178 if (!pPhysPage)
4179 {
4180# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4181 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4182 {
4183 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4184 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4185 cErrors++;
4186 continue;
4187 }
4188# endif
4189 if (PteDst.n.u1Write)
4190 {
4191 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4192 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4193 cErrors++;
4194 }
4195 fIgnoreFlags |= X86_PTE_RW;
4196 }
4197 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4198 {
4199 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4200 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4201 cErrors++;
4202 continue;
4203 }
4204
4205 /* flags */
4206 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4207 {
4208 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4209 {
4210 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4211 {
4212 if (PteDst.n.u1Write)
4213 {
4214 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4215 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4216 cErrors++;
4217 continue;
4218 }
4219 fIgnoreFlags |= X86_PTE_RW;
4220 }
4221 }
4222 else
4223 {
4224 if (PteDst.n.u1Present)
4225 {
4226 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4227 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4228 cErrors++;
4229 continue;
4230 }
4231 fIgnoreFlags |= X86_PTE_P;
4232 }
4233 }
4234
4235 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4236 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4237 )
4238 {
4239 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4240 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4241 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4242 cErrors++;
4243 continue;
4244 }
4245 } /* for each PTE */
4246 }
4247 }
4248 /* not present */
4249
4250 } /* for each PDE */
4251
4252 } /* for each PDPTE */
4253
4254 } /* for each PML4E */
4255
4256# ifdef DEBUG
4257 if (cErrors)
4258 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4259# endif
4260
4261#endif /* GST == 32BIT, PAE or AMD64 */
4262 return cErrors;
4263
4264#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4265}
4266#endif /* VBOX_STRICT */
4267
4268
4269/**
4270 * Sets up the CR3 for shadow paging
4271 *
4272 * @returns Strict VBox status code.
4273 * @retval VINF_SUCCESS.
4274 *
4275 * @param pVCpu The VMCPU handle.
4276 * @param GCPhysCR3 The physical address in the CR3 register.
4277 */
4278PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4279{
4280 PVM pVM = pVCpu->CTX_SUFF(pVM);
4281
4282 /* Update guest paging info. */
4283#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4284 || PGM_GST_TYPE == PGM_TYPE_PAE \
4285 || PGM_GST_TYPE == PGM_TYPE_AMD64
4286
4287 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4288
4289 /*
4290 * Map the page CR3 points at.
4291 */
4292 RTHCPTR HCPtrGuestCR3;
4293 RTHCPHYS HCPhysGuestCR3;
4294 pgmLock(pVM);
4295 PPGMPAGE pPageCR3 = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4296 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4297 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4298 /** @todo this needs some reworking wrt. locking. */
4299# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4300 HCPtrGuestCR3 = NIL_RTHCPTR;
4301 int rc = VINF_SUCCESS;
4302# else
4303 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4304# endif
4305 pgmUnlock(pVM);
4306 if (RT_SUCCESS(rc))
4307 {
4308 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4309 if (RT_SUCCESS(rc))
4310 {
4311# ifdef IN_RC
4312 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4313# endif
4314# if PGM_GST_TYPE == PGM_TYPE_32BIT
4315 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4316# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4317 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4318# endif
4319 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4320
4321# elif PGM_GST_TYPE == PGM_TYPE_PAE
4322 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4323 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4324# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4325 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4326# endif
4327 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4328 Log(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4329
4330 /*
4331 * Map the 4 PDs too.
4332 */
4333 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVCpu->pgm.s);
4334 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4335 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4336 {
4337 if (pGuestPDPT->a[i].n.u1Present)
4338 {
4339 RTHCPTR HCPtr;
4340 RTHCPHYS HCPhys;
4341 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4342 pgmLock(pVM);
4343 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4344 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4345 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4346# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4347 HCPtr = NIL_RTHCPTR;
4348 int rc2 = VINF_SUCCESS;
4349# else
4350 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4351# endif
4352 pgmUnlock(pVM);
4353 if (RT_SUCCESS(rc2))
4354 {
4355 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4356 AssertRCReturn(rc, rc);
4357
4358 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4359# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4360 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4361# endif
4362 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4363 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4364# ifdef IN_RC
4365 PGM_INVL_PG(pVCpu, GCPtr);
4366# endif
4367 continue;
4368 }
4369 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4370 }
4371
4372 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4373# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4374 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4375# endif
4376 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4377 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4378# ifdef IN_RC
4379 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4380# endif
4381 }
4382
4383# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4384 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4385# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4386 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4387# endif
4388# endif
4389 }
4390 else
4391 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4392 }
4393 else
4394 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4395
4396#else /* prot/real stub */
4397 int rc = VINF_SUCCESS;
4398#endif
4399
4400 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4401# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4402 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4403 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4404 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4405 && PGM_GST_TYPE != PGM_TYPE_PROT))
4406
4407 Assert(!HWACCMIsNestedPagingActive(pVM));
4408
4409 /*
4410 * Update the shadow root page as well since that's not fixed.
4411 */
4412 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4413 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4414 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4415 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4416 PPGMPOOLPAGE pNewShwPageCR3;
4417
4418 pgmLock(pVM);
4419
4420# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4421 if (pPool->cDirtyPages)
4422 pgmPoolResetDirtyPages(pVM);
4423# endif
4424
4425 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4426 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4427 AssertFatalRC(rc);
4428 rc = VINF_SUCCESS;
4429
4430# ifdef IN_RC
4431 /*
4432 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4433 * state will be inconsistent! Flush important things now while
4434 * we still can and then make sure there are no ring-3 calls.
4435 */
4436 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4437 VMMRZCallRing3Disable(pVCpu);
4438# endif
4439
4440 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4441 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4442 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4443# ifdef IN_RING0
4444 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4445 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4446# elif defined(IN_RC)
4447 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4448 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4449# else
4450 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4451 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4452# endif
4453
4454# ifndef PGM_WITHOUT_MAPPINGS
4455 /*
4456 * Apply all hypervisor mappings to the new CR3.
4457 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4458 * make sure we check for conflicts in the new CR3 root.
4459 */
4460# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4461 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4462# endif
4463 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4464 AssertRCReturn(rc, rc);
4465# endif
4466
4467 /* Set the current hypervisor CR3. */
4468 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4469 SELMShadowCR3Changed(pVM, pVCpu);
4470
4471# ifdef IN_RC
4472 /* NOTE: The state is consistent again. */
4473 VMMRZCallRing3Enable(pVCpu);
4474# endif
4475
4476 /* Clean up the old CR3 root. */
4477 if ( pOldShwPageCR3
4478 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4479 {
4480 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4481# ifndef PGM_WITHOUT_MAPPINGS
4482 /* Remove the hypervisor mappings from the shadow page table. */
4483 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4484# endif
4485 /* Mark the page as unlocked; allow flushing again. */
4486 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4487
4488 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4489 }
4490 pgmUnlock(pVM);
4491# endif
4492
4493 return rc;
4494}
4495
4496/**
4497 * Unmaps the shadow CR3.
4498 *
4499 * @returns VBox status, no specials.
4500 * @param pVCpu The VMCPU handle.
4501 */
4502PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4503{
4504 LogFlow(("UnmapCR3\n"));
4505
4506 int rc = VINF_SUCCESS;
4507 PVM pVM = pVCpu->CTX_SUFF(pVM);
4508
4509 /*
4510 * Update guest paging info.
4511 */
4512#if PGM_GST_TYPE == PGM_TYPE_32BIT
4513 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4514# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4515 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4516# endif
4517 pVCpu->pgm.s.pGst32BitPdRC = 0;
4518
4519#elif PGM_GST_TYPE == PGM_TYPE_PAE
4520 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4521# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4522 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4523# endif
4524 pVCpu->pgm.s.pGstPaePdptRC = 0;
4525 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4526 {
4527 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4528# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4529 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4530# endif
4531 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4532 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4533 }
4534
4535#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4536 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4537# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4538 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4539# endif
4540
4541#else /* prot/real mode stub */
4542 /* nothing to do */
4543#endif
4544
4545#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4546 /*
4547 * Update shadow paging info.
4548 */
4549# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4550 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4551 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4552
4553# if PGM_GST_TYPE != PGM_TYPE_REAL
4554 Assert(!HWACCMIsNestedPagingActive(pVM));
4555# endif
4556
4557 pgmLock(pVM);
4558
4559# ifndef PGM_WITHOUT_MAPPINGS
4560 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4561 /* Remove the hypervisor mappings from the shadow page table. */
4562 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4563# endif
4564
4565 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4566 {
4567 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4568
4569 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4570
4571# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4572 if (pPool->cDirtyPages)
4573 pgmPoolResetDirtyPages(pVM);
4574# endif
4575
4576 /* Mark the page as unlocked; allow flushing again. */
4577 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4578
4579 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4580 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4581 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4582 pVCpu->pgm.s.pShwPageCR3RC = 0;
4583 pVCpu->pgm.s.iShwUser = 0;
4584 pVCpu->pgm.s.iShwUserTable = 0;
4585 }
4586 pgmUnlock(pVM);
4587# endif
4588#endif /* !IN_RC*/
4589
4590 return rc;
4591}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette