VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 28656

Last change on this file since 28656 was 28656, checked in by vboxsync, 15 years ago

pgmPoolTrackPhysExtDerefGCPhys needs to check the PTE index as well in order not to kick out the wrong slot

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 199.7 KB
Line 
1/* $Id: PGMAllBth.h 28656 2010-04-23 14:43:35Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27RT_C_DECLS_BEGIN
28PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
29PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
34PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
35PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
36PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
37#ifdef VBOX_STRICT
38PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
39#endif
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte);
41PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
42PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
43RT_C_DECLS_END
44
45
46/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
47#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
48# error "Invalid combination; PAE guest implies PAE shadow"
49#endif
50
51#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
52 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
53# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
54#endif
55
56#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
57 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
58# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
59#endif
60
61#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
62 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
63# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
64#endif
65
66
67#ifndef IN_RING3
68/**
69 * #PF Handler for raw-mode guest execution.
70 *
71 * @returns VBox status code (appropriate for trap handling and GC return).
72 *
73 * @param pVCpu VMCPU Handle.
74 * @param uErr The trap error code.
75 * @param pRegFrame Trap register frame.
76 * @param pvFault The fault address.
77 * @param pfLockTaken PGM lock taken here or not (out)
78 */
79PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
80{
81 PVM pVM = pVCpu->CTX_SUFF(pVM);
82
83 *pfLockTaken = false;
84
85# if defined(IN_RC) && defined(VBOX_STRICT)
86 PGMDynCheckLocks(pVM);
87# endif
88
89# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
90 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
91 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
92
93# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
94 /*
95 * Hide the instruction fetch trap indicator for now.
96 */
97 /** @todo NXE will change this and we must fix NXE in the switcher too! */
98 if (uErr & X86_TRAP_PF_ID)
99 {
100 uErr &= ~X86_TRAP_PF_ID;
101 TRPMSetErrorCode(pVCpu, uErr);
102 }
103# endif
104
105 /*
106 * Get PDs.
107 */
108 int rc;
109# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
110# if PGM_GST_TYPE == PGM_TYPE_32BIT
111 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
112 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
113
114# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
115
116# if PGM_GST_TYPE == PGM_TYPE_PAE
117 unsigned iPDSrc = 0; /* initialized to shut up gcc */
118 X86PDPE PdpeSrc;
119 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
120
121# elif PGM_GST_TYPE == PGM_TYPE_AMD64
122 unsigned iPDSrc = 0; /* initialized to shut up gcc */
123 PX86PML4E pPml4eSrc;
124 X86PDPE PdpeSrc;
125 PGSTPD pPDSrc;
126
127 pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
128 Assert(pPml4eSrc);
129# endif
130
131 /* Quick check for a valid guest trap. (PAE & AMD64) */
132 if (!pPDSrc)
133 {
134# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
135 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
136# else
137 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
138# endif
139 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
140 TRPMSetErrorCode(pVCpu, uErr);
141 return VINF_EM_RAW_GUEST_TRAP;
142 }
143# endif
144
145# else /* !PGM_WITH_PAGING */
146 PGSTPD pPDSrc = NULL;
147 const unsigned iPDSrc = 0;
148# endif /* !PGM_WITH_PAGING */
149
150# if !defined(PGM_WITHOUT_MAPPINGS) && ((PGM_GST_TYPE == PGM_TYPE_32BIT) || (PGM_GST_TYPE == PGM_TYPE_PAE))
151 /*
152 * Check for write conflicts with our hypervisor mapping early on. If the guest happens to access a non-present page,
153 * where our hypervisor is currently mapped, then we'll create a #PF storm in the guest.
154 */
155 if ( (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW)
156 && MMHyperIsInsideArea(pVM, pvFault))
157 {
158 /* Force a CR3 sync to check for conflicts and emulate the instruction. */
159 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
160 return VINF_EM_RAW_EMULATE_INSTR;
161 }
162# endif
163
164 /* First check for a genuine guest page fault. */
165# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
166 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
167 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDSrc->a[iPDSrc], pvFault);
168 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
169 if (rc == VINF_EM_RAW_GUEST_TRAP)
170 {
171 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
172 = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
173 return rc;
174 }
175# endif /* PGM_WITH_PAGING */
176
177 /* Take the big lock now. */
178 *pfLockTaken = true;
179 pgmLock(pVM);
180
181 /* Fetch the guest PDE */
182# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
183 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
184# else
185 GSTPDE PdeSrc;
186 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
187 PdeSrc.n.u1Present = 1;
188 PdeSrc.n.u1Write = 1;
189 PdeSrc.n.u1Accessed = 1;
190 PdeSrc.n.u1User = 1;
191# endif
192
193# if PGM_SHW_TYPE == PGM_TYPE_32BIT
194 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
195 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
196
197# elif PGM_SHW_TYPE == PGM_TYPE_PAE
198 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
199
200 PX86PDPAE pPDDst;
201# if PGM_GST_TYPE != PGM_TYPE_PAE
202 X86PDPE PdpeSrc;
203
204 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
205 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
206# endif
207 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
208 if (rc != VINF_SUCCESS)
209 {
210 AssertRC(rc);
211 return rc;
212 }
213 Assert(pPDDst);
214
215# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
216 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
217 PX86PDPAE pPDDst;
218# if PGM_GST_TYPE == PGM_TYPE_PROT
219 /* AMD-V nested paging */
220 X86PML4E Pml4eSrc;
221 X86PDPE PdpeSrc;
222 PX86PML4E pPml4eSrc = &Pml4eSrc;
223
224 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
225 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
226 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
227# endif
228
229 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
230 if (rc != VINF_SUCCESS)
231 {
232 AssertRC(rc);
233 return rc;
234 }
235 Assert(pPDDst);
236
237# elif PGM_SHW_TYPE == PGM_TYPE_EPT
238 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
239 PEPTPD pPDDst;
240
241 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
242 if (rc != VINF_SUCCESS)
243 {
244 AssertRC(rc);
245 return rc;
246 }
247 Assert(pPDDst);
248# endif
249
250# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
251 /* Dirty page handling. */
252 if (uErr & X86_TRAP_PF_RW) /* write fault? */
253 {
254 /*
255 * If we successfully correct the write protection fault due to dirty bit
256 * tracking, then return immediately.
257 */
258 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
259 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
260 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
261 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
262 {
263 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
264 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
265 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
266 return VINF_SUCCESS;
267 }
268 }
269
270# if 0 /* rarely useful; leave for debugging. */
271 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
272# endif
273# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
274
275 /*
276 * A common case is the not-present error caused by lazy page table syncing.
277 *
278 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
279 * so we can safely assume that the shadow PT is present when calling SyncPage later.
280 *
281 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
282 * of mapping conflict and defer to SyncCR3 in R3.
283 * (Again, we do NOT support access handlers for non-present guest pages.)
284 *
285 */
286 Assert(PdeSrc.n.u1Present);
287 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
288 && !pPDDst->a[iPDDst].n.u1Present
289 )
290 {
291 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
292 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
293 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
294 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
295 if (RT_SUCCESS(rc))
296 {
297 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
298 return rc;
299 }
300 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
301 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
302 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
303 return VINF_PGM_SYNC_CR3;
304 }
305
306# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
307 /*
308 * Check if this address is within any of our mappings.
309 *
310 * This is *very* fast and it's gonna save us a bit of effort below and prevent
311 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
312 * (BTW, it's impossible to have physical access handlers in a mapping.)
313 */
314 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
315 {
316 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
317 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
318 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
319 {
320 if (pvFault < pMapping->GCPtr)
321 break;
322 if (pvFault - pMapping->GCPtr < pMapping->cb)
323 {
324 /*
325 * The first thing we check is if we've got an undetected conflict.
326 */
327 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
328 {
329 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
330 while (iPT-- > 0)
331 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
332 {
333 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
334 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
335 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
336 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
337 return VINF_PGM_SYNC_CR3;
338 }
339 }
340
341 /*
342 * Check if the fault address is in a virtual page access handler range.
343 */
344 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
345 if ( pCur
346 && pvFault - pCur->Core.Key < pCur->cb
347 && uErr & X86_TRAP_PF_RW)
348 {
349# ifdef IN_RC
350 STAM_PROFILE_START(&pCur->Stat, h);
351 pgmUnlock(pVM);
352 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
353 pgmLock(pVM);
354 STAM_PROFILE_STOP(&pCur->Stat, h);
355# else
356 AssertFailed();
357 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
358# endif
359 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
360 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
361 return rc;
362 }
363
364 /*
365 * Pretend we're not here and let the guest handle the trap.
366 */
367 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
368 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
369 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
370 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
371 return VINF_EM_RAW_GUEST_TRAP;
372 }
373 }
374 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
375 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
376# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
377
378 /*
379 * Check if this fault address is flagged for special treatment,
380 * which means we'll have to figure out the physical address and
381 * check flags associated with it.
382 *
383 * ASSUME that we can limit any special access handling to pages
384 * in page tables which the guest believes to be present.
385 */
386 Assert(PdeSrc.n.u1Present);
387 {
388 RTGCPHYS GCPhys = NIL_RTGCPHYS;
389
390# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
391 if ( PdeSrc.b.u1Size
392# if PGM_GST_TYPE == PGM_TYPE_32BIT
393 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
394# endif
395 )
396 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
397 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
398 else
399 {
400 PGSTPT pPTSrc;
401 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
402 if (RT_SUCCESS(rc))
403 {
404 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
405 if (pPTSrc->a[iPTESrc].n.u1Present)
406 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
407 }
408 }
409# else
410 /* No paging so the fault address is the physical address */
411 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
412# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
413
414 /*
415 * If we have a GC address we'll check if it has any flags set.
416 */
417 if (GCPhys != NIL_RTGCPHYS)
418 {
419 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
420
421 PPGMPAGE pPage;
422 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
423 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
424 {
425 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
426 {
427 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
428 {
429 /*
430 * Physical page access handler.
431 */
432 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
433 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
434 if (pCur)
435 {
436# ifdef PGM_SYNC_N_PAGES
437 /*
438 * If the region is write protected and we got a page not present fault, then sync
439 * the pages. If the fault was caused by a read, then restart the instruction.
440 * In case of write access continue to the GC write handler.
441 *
442 * ASSUMES that there is only one handler per page or that they have similar write properties.
443 */
444 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
445 && !(uErr & X86_TRAP_PF_P))
446 {
447 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
448 if ( RT_FAILURE(rc)
449 || !(uErr & X86_TRAP_PF_RW)
450 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
451 {
452 AssertRC(rc);
453 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
454 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
455 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
456 return rc;
457 }
458 }
459# endif
460
461 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
462 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
463 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
464
465# if defined(IN_RC) || defined(IN_RING0)
466 if (pCur->CTX_SUFF(pfnHandler))
467 {
468 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
469# ifdef IN_RING0
470 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
471# else
472 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
473# endif
474 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
475 void *pvUser = pCur->CTX_SUFF(pvUser);
476
477 STAM_PROFILE_START(&pCur->Stat, h);
478 if (fLeaveLock)
479 pgmUnlock(pVM); /* @todo: Not entirely safe. */
480
481 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
482 if (fLeaveLock)
483 pgmLock(pVM);
484# ifdef VBOX_WITH_STATISTICS
485 pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
486 if (pCur)
487 STAM_PROFILE_STOP(&pCur->Stat, h);
488# else
489 pCur = NULL; /* might be invalid by now. */
490# endif
491
492 }
493 else
494# endif
495 rc = VINF_EM_RAW_EMULATE_INSTR;
496
497 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
498 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
499 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
500 return rc;
501 }
502 }
503# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
504 else
505 {
506# ifdef PGM_SYNC_N_PAGES
507 /*
508 * If the region is write protected and we got a page not present fault, then sync
509 * the pages. If the fault was caused by a read, then restart the instruction.
510 * In case of write access continue to the GC write handler.
511 */
512 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
513 && !(uErr & X86_TRAP_PF_P))
514 {
515 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
516 if ( RT_FAILURE(rc)
517 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
518 || !(uErr & X86_TRAP_PF_RW))
519 {
520 AssertRC(rc);
521 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
522 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
523 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
524 return rc;
525 }
526 }
527# endif
528 /*
529 * Ok, it's an virtual page access handler.
530 *
531 * Since it's faster to search by address, we'll do that first
532 * and then retry by GCPhys if that fails.
533 */
534 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
535 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
536 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
537 */
538 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
539 if (pCur)
540 {
541 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
542 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
543 || !(uErr & X86_TRAP_PF_P)
544 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
545 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
546
547 if ( pvFault - pCur->Core.Key < pCur->cb
548 && ( uErr & X86_TRAP_PF_RW
549 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
550 {
551# ifdef IN_RC
552 STAM_PROFILE_START(&pCur->Stat, h);
553 pgmUnlock(pVM);
554 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
555 pgmLock(pVM);
556 STAM_PROFILE_STOP(&pCur->Stat, h);
557# else
558 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
559# endif
560 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
561 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
562 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
563 return rc;
564 }
565 /* Unhandled part of a monitored page */
566 }
567 else
568 {
569 /* Check by physical address. */
570 unsigned iPage;
571 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
572 &pCur, &iPage);
573 Assert(RT_SUCCESS(rc) || !pCur);
574 if ( pCur
575 && ( uErr & X86_TRAP_PF_RW
576 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
577 {
578 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
579# ifdef IN_RC
580 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
581 Assert(off < pCur->cb);
582 STAM_PROFILE_START(&pCur->Stat, h);
583 pgmUnlock(pVM);
584 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
585 pgmLock(pVM);
586 STAM_PROFILE_STOP(&pCur->Stat, h);
587# else
588 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
589# endif
590 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
591 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
592 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
593 return rc;
594 }
595 }
596 }
597# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
598
599 /*
600 * There is a handled area of the page, but this fault doesn't belong to it.
601 * We must emulate the instruction.
602 *
603 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
604 * we first check if this was a page-not-present fault for a page with only
605 * write access handlers. Restart the instruction if it wasn't a write access.
606 */
607 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
608
609 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
610 && !(uErr & X86_TRAP_PF_P))
611 {
612 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
613 if ( RT_FAILURE(rc)
614 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
615 || !(uErr & X86_TRAP_PF_RW))
616 {
617 AssertRC(rc);
618 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
619 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
620 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
621 return rc;
622 }
623 }
624
625 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
626 * It's writing to an unhandled part of the LDT page several million times.
627 */
628 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
629 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
630 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
631 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
632 return rc;
633 } /* if any kind of handler */
634
635# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
636 if (uErr & X86_TRAP_PF_P)
637 {
638 /*
639 * The page isn't marked, but it might still be monitored by a virtual page access handler.
640 * (ASSUMES no temporary disabling of virtual handlers.)
641 */
642 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
643 * we should correct both the shadow page table and physical memory flags, and not only check for
644 * accesses within the handler region but for access to pages with virtual handlers. */
645 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
646 if (pCur)
647 {
648 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
649 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
650 || !(uErr & X86_TRAP_PF_P)
651 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
652 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
653
654 if ( pvFault - pCur->Core.Key < pCur->cb
655 && ( uErr & X86_TRAP_PF_RW
656 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
657 {
658# ifdef IN_RC
659 STAM_PROFILE_START(&pCur->Stat, h);
660 pgmUnlock(pVM);
661 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
662 pgmLock(pVM);
663 STAM_PROFILE_STOP(&pCur->Stat, h);
664# else
665 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
666# endif
667 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
668 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
669 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
670 return rc;
671 }
672 }
673 }
674# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
675 }
676 else
677 {
678 /*
679 * When the guest accesses invalid physical memory (e.g. probing
680 * of RAM or accessing a remapped MMIO range), then we'll fall
681 * back to the recompiler to emulate the instruction.
682 */
683 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
684 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
685 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
686 return VINF_EM_RAW_EMULATE_INSTR;
687 }
688
689 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
690
691# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
692 /*
693 * We are here only if page is present in Guest page tables and
694 * trap is not handled by our handlers.
695 *
696 * Check it for page out-of-sync situation.
697 */
698 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
699
700 if (!(uErr & X86_TRAP_PF_P))
701 {
702 /*
703 * Page is not present in our page tables.
704 * Try to sync it!
705 * BTW, fPageShw is invalid in this branch!
706 */
707 if (uErr & X86_TRAP_PF_US)
708 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
709 else /* supervisor */
710 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
711
712 if (PGM_PAGE_IS_BALLOONED(pPage))
713 {
714 /* Emulate reads from ballooned pages as they are not present in our shadow page tables. (required for e.g. Solaris guests; soft ecc, random nr generator) */
715 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
716 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
717 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
718 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
719 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
720 return rc;
721 }
722# if defined(LOG_ENABLED) && !defined(IN_RING0)
723 RTGCPHYS GCPhys2;
724 uint64_t fPageGst2;
725 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
726 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
727 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
728# endif /* LOG_ENABLED */
729
730# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
731 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
732 {
733 uint64_t fPageGst;
734 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
735 if ( RT_SUCCESS(rc)
736 && !(fPageGst & X86_PTE_US))
737 {
738 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
739 if ( pvFault == (RTGCPTR)pRegFrame->eip
740 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
741# ifdef CSAM_DETECT_NEW_CODE_PAGES
742 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
743 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
744# endif /* CSAM_DETECT_NEW_CODE_PAGES */
745 )
746 {
747 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
748 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
749 if (rc != VINF_SUCCESS)
750 {
751 /*
752 * CSAM needs to perform a job in ring 3.
753 *
754 * Sync the page before going to the host context; otherwise we'll end up in a loop if
755 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
756 */
757 LogFlow(("CSAM ring 3 job\n"));
758 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
759 AssertRC(rc2);
760
761 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
762 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
763 return rc;
764 }
765 }
766# ifdef CSAM_DETECT_NEW_CODE_PAGES
767 else if ( uErr == X86_TRAP_PF_RW
768 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
769 && pRegFrame->ecx < 0x10000)
770 {
771 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
772 * to detect loading of new code pages.
773 */
774
775 /*
776 * Decode the instruction.
777 */
778 RTGCPTR PC;
779 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
780 if (rc == VINF_SUCCESS)
781 {
782 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
783 uint32_t cbOp;
784 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
785
786 /* For now we'll restrict this to rep movsw/d instructions */
787 if ( rc == VINF_SUCCESS
788 && pDis->pCurInstr->opcode == OP_MOVSWD
789 && (pDis->prefix & PREFIX_REP))
790 {
791 CSAMMarkPossibleCodePage(pVM, pvFault);
792 }
793 }
794 }
795# endif /* CSAM_DETECT_NEW_CODE_PAGES */
796
797 /*
798 * Mark this page as safe.
799 */
800 /** @todo not correct for pages that contain both code and data!! */
801 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
802 CSAMMarkPage(pVM, pvFault, true);
803 }
804 }
805# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
806 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
807 if (RT_SUCCESS(rc))
808 {
809 /* The page was successfully synced, return to the guest. */
810 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
811 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
812 return VINF_SUCCESS;
813 }
814 }
815 else /* uErr & X86_TRAP_PF_P: */
816 {
817 /*
818 * Write protected pages are make writable when the guest makes the first
819 * write to it. This happens for pages that are shared, write monitored
820 * and not yet allocated.
821 *
822 * Also, a side effect of not flushing global PDEs are out of sync pages due
823 * to physical monitored regions, that are no longer valid.
824 * Assume for now it only applies to the read/write flag.
825 */
826 if ( RT_SUCCESS(rc)
827 && (uErr & X86_TRAP_PF_RW))
828 {
829 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
830 {
831 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
832 Assert(!PGM_PAGE_IS_ZERO(pPage));
833 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
834
835 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
836 if (rc != VINF_SUCCESS)
837 {
838 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
839 return rc;
840 }
841 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
842 return VINF_EM_NO_MEMORY;
843 }
844
845# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
846 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
847 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
848 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG))
849 {
850 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
851 uint64_t fPageGst;
852 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
853 if ( RT_SUCCESS(rc)
854 && !(fPageGst & X86_PTE_RW))
855 {
856 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
857 if (RT_SUCCESS(rc))
858 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
859 else
860 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
861 return rc;
862 }
863 AssertMsg(RT_SUCCESS(rc), ("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
864 }
865# endif
866 /// @todo count the above case; else
867 if (uErr & X86_TRAP_PF_US)
868 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
869 else /* supervisor */
870 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
871
872 /*
873 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
874 * page is not present, which is not true in this case.
875 */
876 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
877 if (RT_SUCCESS(rc))
878 {
879 /*
880 * Page was successfully synced, return to guest.
881 * First invalidate the page as it might be in the TLB.
882 */
883# if PGM_SHW_TYPE == PGM_TYPE_EPT
884 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
885# else
886 PGM_INVL_PG(pVCpu, pvFault);
887# endif
888# ifdef VBOX_STRICT
889 RTGCPHYS GCPhys2;
890 uint64_t fPageGst;
891 if (!HWACCMIsNestedPagingActive(pVM))
892 {
893 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
894 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%d fPageGst=%RX64\n"));
895 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
896 }
897 uint64_t fPageShw;
898 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
899 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
900# endif /* VBOX_STRICT */
901 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
902 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
903 return VINF_SUCCESS;
904 }
905 }
906
907# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
908# ifdef VBOX_STRICT
909 /*
910 * Check for VMM page flags vs. Guest page flags consistency.
911 * Currently only for debug purposes.
912 */
913 if (RT_SUCCESS(rc))
914 {
915 /* Get guest page flags. */
916 uint64_t fPageGst;
917 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
918 if (RT_SUCCESS(rc))
919 {
920 uint64_t fPageShw;
921 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
922
923 /*
924 * Compare page flags.
925 * Note: we have AVL, A, D bits desynched.
926 */
927 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
928 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
929 }
930 else
931 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
932 }
933 else
934 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
935# endif /* VBOX_STRICT */
936# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
937 }
938 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
939# endif /* PGM_OUT_OF_SYNC_IN_GC */
940 }
941 else /* GCPhys == NIL_RTGCPHYS */
942 {
943 /*
944 * Page not present in Guest OS or invalid page table address.
945 * This is potential virtual page access handler food.
946 *
947 * For the present we'll say that our access handlers don't
948 * work for this case - we've already discarded the page table
949 * not present case which is identical to this.
950 *
951 * When we perchance find we need this, we will probably have AVL
952 * trees (offset based) to operate on and we can measure their speed
953 * agains mapping a page table and probably rearrange this handling
954 * a bit. (Like, searching virtual ranges before checking the
955 * physical address.)
956 */
957 }
958 }
959
960# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
961 /*
962 * Conclusion, this is a guest trap.
963 */
964 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
965 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
966 return VINF_EM_RAW_GUEST_TRAP;
967# else
968 /* present, but not a monitored page; perhaps the guest is probing physical memory */
969 return VINF_EM_RAW_EMULATE_INSTR;
970# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
971
972
973# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
974
975 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
976 return VERR_INTERNAL_ERROR;
977# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
978}
979#endif /* !IN_RING3 */
980
981
982/**
983 * Emulation of the invlpg instruction.
984 *
985 *
986 * @returns VBox status code.
987 *
988 * @param pVCpu The VMCPU handle.
989 * @param GCPtrPage Page to invalidate.
990 *
991 * @remark ASSUMES that the guest is updating before invalidating. This order
992 * isn't required by the CPU, so this is speculative and could cause
993 * trouble.
994 * @remark No TLB shootdown is done on any other VCPU as we assume that
995 * invlpg emulation is the *only* reason for calling this function.
996 * (The guest has to shoot down TLB entries on other CPUs itself)
997 * Currently true, but keep in mind!
998 *
999 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1000 */
1001PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
1002{
1003#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1004 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1005 && PGM_SHW_TYPE != PGM_TYPE_EPT
1006 int rc;
1007 PVM pVM = pVCpu->CTX_SUFF(pVM);
1008 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1009
1010 Assert(PGMIsLockOwner(pVM));
1011
1012 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1013
1014# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1015 if (pPool->cDirtyPages)
1016 pgmPoolResetDirtyPages(pVM);
1017# endif
1018
1019 /*
1020 * Get the shadow PD entry and skip out if this PD isn't present.
1021 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1022 */
1023# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1024 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1025 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1026
1027 /* Fetch the pgm pool shadow descriptor. */
1028 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1029 Assert(pShwPde);
1030
1031# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1032 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1033 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
1034
1035 /* If the shadow PDPE isn't present, then skip the invalidate. */
1036 if (!pPdptDst->a[iPdpt].n.u1Present)
1037 {
1038 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1039 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1040 return VINF_SUCCESS;
1041 }
1042
1043 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1044 PPGMPOOLPAGE pShwPde = NULL;
1045 PX86PDPAE pPDDst;
1046
1047 /* Fetch the pgm pool shadow descriptor. */
1048 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1049 AssertRCSuccessReturn(rc, rc);
1050 Assert(pShwPde);
1051
1052 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1053 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1054
1055# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1056 /* PML4 */
1057 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1058 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1059 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1060 PX86PDPAE pPDDst;
1061 PX86PDPT pPdptDst;
1062 PX86PML4E pPml4eDst;
1063 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1064 if (rc != VINF_SUCCESS)
1065 {
1066 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1067 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1068 return VINF_SUCCESS;
1069 }
1070 Assert(pPDDst);
1071
1072 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1073 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1074
1075 if (!pPdpeDst->n.u1Present)
1076 {
1077 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1078 return VINF_SUCCESS;
1079 }
1080
1081 /* Fetch the pgm pool shadow descriptor. */
1082 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1083 Assert(pShwPde);
1084
1085# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1086
1087 const SHWPDE PdeDst = *pPdeDst;
1088 if (!PdeDst.n.u1Present)
1089 {
1090 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1091 return VINF_SUCCESS;
1092 }
1093
1094# if defined(IN_RC)
1095 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1096 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1097# endif
1098
1099 /*
1100 * Get the guest PD entry and calc big page.
1101 */
1102# if PGM_GST_TYPE == PGM_TYPE_32BIT
1103 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
1104 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1105 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1106# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1107 unsigned iPDSrc = 0;
1108# if PGM_GST_TYPE == PGM_TYPE_PAE
1109 X86PDPE PdpeSrc;
1110 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1111# else /* AMD64 */
1112 PX86PML4E pPml4eSrc;
1113 X86PDPE PdpeSrc;
1114 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1115# endif
1116 GSTPDE PdeSrc;
1117
1118 if (pPDSrc)
1119 PdeSrc = pPDSrc->a[iPDSrc];
1120 else
1121 PdeSrc.u = 0;
1122# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1123
1124# if PGM_GST_TYPE == PGM_TYPE_32BIT
1125 const bool fIsBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1126# else
1127 const bool fIsBigPage = PdeSrc.b.u1Size;
1128# endif
1129
1130# ifdef IN_RING3
1131 /*
1132 * If a CR3 Sync is pending we may ignore the invalidate page operation
1133 * depending on the kind of sync and if it's a global page or not.
1134 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1135 */
1136# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1137 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1138 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1139 && fIsBigPage
1140 && PdeSrc.b.u1Global
1141 )
1142 )
1143# else
1144 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1145# endif
1146 {
1147 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1148 return VINF_SUCCESS;
1149 }
1150# endif /* IN_RING3 */
1151
1152 /*
1153 * Deal with the Guest PDE.
1154 */
1155 rc = VINF_SUCCESS;
1156 if (PdeSrc.n.u1Present)
1157 {
1158 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1159 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1160# ifndef PGM_WITHOUT_MAPPING
1161 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1162 {
1163 /*
1164 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1165 */
1166 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1167 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1168 pgmLock(pVM);
1169 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1170 pgmUnlock(pVM);
1171 }
1172 else
1173# endif /* !PGM_WITHOUT_MAPPING */
1174 if (!fIsBigPage)
1175 {
1176 /*
1177 * 4KB - page.
1178 */
1179 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1180 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1181
1182# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1183 /* Reset the modification counter (OpenSolaris trashes tlb entries very often) */
1184 if (pShwPage->cModifications)
1185 pShwPage->cModifications = 1;
1186# endif
1187
1188# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1189 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1190 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1191# endif
1192 if (pShwPage->GCPhys == GCPhys)
1193 {
1194# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1195 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1196 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1197 if (pPT->a[iPTEDst].n.u1Present)
1198 {
1199 /* This is very unlikely with caching/monitoring enabled. */
1200 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK, iPTEDst);
1201 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1202 }
1203# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1204 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1205 if (RT_SUCCESS(rc))
1206 rc = VINF_SUCCESS;
1207# endif
1208 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1209 PGM_INVL_PG(pVCpu, GCPtrPage);
1210 }
1211 else
1212 {
1213 /*
1214 * The page table address changed.
1215 */
1216 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1217 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1218 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1219 ASMAtomicWriteSize(pPdeDst, 0);
1220 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1221 PGM_INVL_VCPU_TLBS(pVCpu);
1222 }
1223 }
1224 else
1225 {
1226 /*
1227 * 2/4MB - page.
1228 */
1229 /* Before freeing the page, check if anything really changed. */
1230 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1231 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1232# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1233 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1234 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1235# endif
1236 if ( pShwPage->GCPhys == GCPhys
1237 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1238 {
1239 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1240 /** @todo PAT */
1241 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1242 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1243 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1244 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1245 {
1246 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1247 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1248# if defined(IN_RC)
1249 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1250 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1251# endif
1252 return VINF_SUCCESS;
1253 }
1254 }
1255
1256 /*
1257 * Ok, the page table is present and it's been changed in the guest.
1258 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1259 * We could do this for some flushes in GC too, but we need an algorithm for
1260 * deciding which 4MB pages containing code likely to be executed very soon.
1261 */
1262 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1263 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1264 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1265 ASMAtomicWriteSize(pPdeDst, 0);
1266 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1267 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1268 }
1269 }
1270 else
1271 {
1272 /*
1273 * Page directory is not present, mark shadow PDE not present.
1274 */
1275 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1276 {
1277 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1278 ASMAtomicWriteSize(pPdeDst, 0);
1279 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1280 PGM_INVL_PG(pVCpu, GCPtrPage);
1281 }
1282 else
1283 {
1284 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1285 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1286 }
1287 }
1288# if defined(IN_RC)
1289 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1290 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1291# endif
1292 return rc;
1293
1294#else /* guest real and protected mode */
1295 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1296 return VINF_SUCCESS;
1297#endif
1298}
1299
1300
1301/**
1302 * Update the tracking of shadowed pages.
1303 *
1304 * @param pVCpu The VMCPU handle.
1305 * @param pShwPage The shadow page.
1306 * @param HCPhys The physical page we is being dereferenced.
1307 * @param iPte Shadow PTE index
1308 */
1309DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte)
1310{
1311 PVM pVM = pVCpu->CTX_SUFF(pVM);
1312
1313 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1314 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1315
1316 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1317 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1318 * 2. write protect all shadowed pages. I.e. implement caching.
1319 */
1320 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1321
1322 /*
1323 * Find the guest address.
1324 */
1325 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1326 pRam;
1327 pRam = pRam->CTX_SUFF(pNext))
1328 {
1329 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1330 while (iPage-- > 0)
1331 {
1332 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1333 {
1334 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1335
1336 Assert(pShwPage->cPresent);
1337 Assert(pPool->cPresent);
1338 pShwPage->cPresent--;
1339 pPool->cPresent--;
1340
1341 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1342 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1343 return;
1344 }
1345 }
1346 }
1347
1348 for (;;)
1349 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1350}
1351
1352
1353/**
1354 * Update the tracking of shadowed pages.
1355 *
1356 * @param pVCpu The VMCPU handle.
1357 * @param pShwPage The shadow page.
1358 * @param u16 The top 16-bit of the pPage->HCPhys.
1359 * @param pPage Pointer to the guest page. this will be modified.
1360 * @param iPTDst The index into the shadow table.
1361 */
1362DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1363{
1364 PVM pVM = pVCpu->CTX_SUFF(pVM);
1365 /*
1366 * Just deal with the simple first time here.
1367 */
1368 if (!u16)
1369 {
1370 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1371 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1372 /* Save the page table index. */
1373 PGM_PAGE_SET_PTE_INDEX(pPage, iPTDst);
1374 }
1375 else
1376 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1377
1378 /* write back */
1379 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1380 PGM_PAGE_SET_TRACKING(pPage, u16);
1381
1382 /* update statistics. */
1383 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1384 pShwPage->cPresent++;
1385 if (pShwPage->iFirstPresent > iPTDst)
1386 pShwPage->iFirstPresent = iPTDst;
1387}
1388
1389
1390/**
1391 * Creates a 4K shadow page for a guest page.
1392 *
1393 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1394 * physical address. The PdeSrc argument only the flags are used. No page structured
1395 * will be mapped in this function.
1396 *
1397 * @param pVCpu The VMCPU handle.
1398 * @param pPteDst Destination page table entry.
1399 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1400 * Can safely assume that only the flags are being used.
1401 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1402 * @param pShwPage Pointer to the shadow page.
1403 * @param iPTDst The index into the shadow table.
1404 *
1405 * @remark Not used for 2/4MB pages!
1406 */
1407DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1408{
1409 if (PteSrc.n.u1Present)
1410 {
1411 PVM pVM = pVCpu->CTX_SUFF(pVM);
1412
1413# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1414 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1415 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64)
1416 if (pShwPage->fDirty)
1417 {
1418 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1419 PX86PTPAE pGstPT;
1420
1421 pGstPT = (PX86PTPAE)&pPool->aDirtyPages[pShwPage->idxDirty][0];
1422 pGstPT->a[iPTDst].u = PteSrc.u;
1423 }
1424# endif
1425 /*
1426 * Find the ram range.
1427 */
1428 PPGMPAGE pPage;
1429 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1430 if (RT_SUCCESS(rc))
1431 {
1432 /* Ignore ballooned pages. Don't return errors or use a fatal assert here as part of a shadow sync range might included ballooned pages. */
1433 if (PGM_PAGE_IS_BALLOONED(pPage))
1434 return;
1435
1436#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1437 /* Try to make the page writable if necessary. */
1438 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1439 && ( PGM_PAGE_IS_ZERO(pPage)
1440 || ( PteSrc.n.u1Write
1441 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1442# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1443 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1444# endif
1445 )
1446 )
1447 )
1448 {
1449 rc = pgmPhysPageMakeWritable(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1450 AssertRC(rc);
1451 }
1452#endif
1453
1454 /** @todo investiage PWT, PCD and PAT. */
1455 /*
1456 * Make page table entry.
1457 */
1458 SHWPTE PteDst;
1459 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1460 {
1461 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1462 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1463 {
1464#if PGM_SHW_TYPE == PGM_TYPE_EPT
1465 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1466 PteDst.n.u1Present = 1;
1467 PteDst.n.u1Execute = 1;
1468 PteDst.n.u1IgnorePAT = 1;
1469 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1470 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1471#else
1472 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1473 | PGM_PAGE_GET_HCPHYS(pPage);
1474#endif
1475 }
1476 else
1477 {
1478 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1479 PteDst.u = 0;
1480 }
1481 /** @todo count these two kinds. */
1482 }
1483 else
1484 {
1485#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1486 /*
1487 * If the page or page directory entry is not marked accessed,
1488 * we mark the page not present.
1489 */
1490 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1491 {
1492 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1493 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1494 PteDst.u = 0;
1495 }
1496 else
1497 /*
1498 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1499 * when the page is modified.
1500 */
1501 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1502 {
1503 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1504 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1505 | PGM_PAGE_GET_HCPHYS(pPage)
1506 | PGM_PTFLAGS_TRACK_DIRTY;
1507 }
1508 else
1509#endif
1510 {
1511 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1512#if PGM_SHW_TYPE == PGM_TYPE_EPT
1513 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1514 PteDst.n.u1Present = 1;
1515 PteDst.n.u1Write = 1;
1516 PteDst.n.u1Execute = 1;
1517 PteDst.n.u1IgnorePAT = 1;
1518 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1519 /* PteDst.n.u1Size = 0 */
1520#else
1521 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1522 | PGM_PAGE_GET_HCPHYS(pPage);
1523#endif
1524 }
1525 }
1526
1527 /*
1528 * Make sure only allocated pages are mapped writable.
1529 */
1530 if ( PteDst.n.u1Write
1531 && PteDst.n.u1Present
1532 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1533 {
1534 /* Still applies to shared pages. */
1535 Assert(!PGM_PAGE_IS_ZERO(pPage));
1536 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1537 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1538 }
1539
1540 /*
1541 * Keep user track up to date.
1542 */
1543 if (PteDst.n.u1Present)
1544 {
1545 if (!pPteDst->n.u1Present)
1546 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1547 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1548 {
1549 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1550 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1551 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1552 }
1553 }
1554 else if (pPteDst->n.u1Present)
1555 {
1556 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1557 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1558 }
1559
1560 /*
1561 * Update statistics and commit the entry.
1562 */
1563#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1564 if (!PteSrc.n.u1Global)
1565 pShwPage->fSeenNonGlobal = true;
1566#endif
1567 ASMAtomicWriteSize(pPteDst, PteDst.u);
1568 }
1569 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1570 /** @todo count these. */
1571 }
1572 else
1573 {
1574 /*
1575 * Page not-present.
1576 */
1577 Log2(("SyncPageWorker: page not present in Pte\n"));
1578 /* Keep user track up to date. */
1579 if (pPteDst->n.u1Present)
1580 {
1581 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1582 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1583 }
1584 ASMAtomicWriteSize(pPteDst, 0);
1585 /** @todo count these. */
1586 }
1587}
1588
1589
1590/**
1591 * Syncs a guest OS page.
1592 *
1593 * There are no conflicts at this point, neither is there any need for
1594 * page table allocations.
1595 *
1596 * @returns VBox status code.
1597 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1598 * @param pVCpu The VMCPU handle.
1599 * @param PdeSrc Page directory entry of the guest.
1600 * @param GCPtrPage Guest context page address.
1601 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1602 * @param uErr Fault error (X86_TRAP_PF_*).
1603 */
1604PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1605{
1606 PVM pVM = pVCpu->CTX_SUFF(pVM);
1607 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1608 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1609
1610 Assert(PGMIsLockOwner(pVM));
1611
1612#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1613 || PGM_GST_TYPE == PGM_TYPE_PAE \
1614 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1615 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1616 && PGM_SHW_TYPE != PGM_TYPE_EPT
1617
1618 /*
1619 * Assert preconditions.
1620 */
1621 Assert(PdeSrc.n.u1Present);
1622 Assert(cPages);
1623# if 0 /* rarely useful; leave for debugging. */
1624 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1625# endif
1626
1627 /*
1628 * Get the shadow PDE, find the shadow page table in the pool.
1629 */
1630# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1631 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1632 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1633
1634 /* Fetch the pgm pool shadow descriptor. */
1635 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1636 Assert(pShwPde);
1637
1638# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1639 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1640 PPGMPOOLPAGE pShwPde = NULL;
1641 PX86PDPAE pPDDst;
1642
1643 /* Fetch the pgm pool shadow descriptor. */
1644 int rc2 = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1645 AssertRCSuccessReturn(rc2, rc2);
1646 Assert(pShwPde);
1647
1648 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1649 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1650
1651# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1652 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1653 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1654 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1655 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1656
1657 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1658 AssertRCSuccessReturn(rc2, rc2);
1659 Assert(pPDDst && pPdptDst);
1660 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1661# endif
1662 SHWPDE PdeDst = *pPdeDst;
1663
1664 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1665 if (!PdeDst.n.u1Present)
1666 {
1667 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE p=%p/%RX64\n", pPdeDst, (uint64_t)PdeDst.u));
1668 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1669 return VINF_SUCCESS; /* force the instruction to be executed again. */
1670 }
1671
1672 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1673 Assert(pShwPage);
1674
1675# if PGM_GST_TYPE == PGM_TYPE_AMD64
1676 /* Fetch the pgm pool shadow descriptor. */
1677 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1678 Assert(pShwPde);
1679# endif
1680
1681# if defined(IN_RC)
1682 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1683 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1684# endif
1685
1686 /*
1687 * Check that the page is present and that the shadow PDE isn't out of sync.
1688 */
1689# if PGM_GST_TYPE == PGM_TYPE_32BIT
1690 const bool fBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1691# else
1692 const bool fBigPage = PdeSrc.b.u1Size;
1693# endif
1694 RTGCPHYS GCPhys;
1695 if (!fBigPage)
1696 {
1697 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1698# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1699 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1700 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1701# endif
1702 }
1703 else
1704 {
1705 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1706# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1707 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1708 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1709# endif
1710 }
1711 if ( pShwPage->GCPhys == GCPhys
1712 && PdeSrc.n.u1Present
1713 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1714 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1715# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1716 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !CPUMIsGuestNXEnabled(pVCpu))
1717# endif
1718 )
1719 {
1720 /*
1721 * Check that the PDE is marked accessed already.
1722 * Since we set the accessed bit *before* getting here on a #PF, this
1723 * check is only meant for dealing with non-#PF'ing paths.
1724 */
1725 if (PdeSrc.n.u1Accessed)
1726 {
1727 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1728 if (!fBigPage)
1729 {
1730 /*
1731 * 4KB Page - Map the guest page table.
1732 */
1733 PGSTPT pPTSrc;
1734 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1735 if (RT_SUCCESS(rc))
1736 {
1737# ifdef PGM_SYNC_N_PAGES
1738 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1739 if ( cPages > 1
1740 && !(uErr & X86_TRAP_PF_P)
1741 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1742 {
1743 /*
1744 * This code path is currently only taken when the caller is PGMTrap0eHandler
1745 * for non-present pages!
1746 *
1747 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1748 * deal with locality.
1749 */
1750 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1751# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1752 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1753 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1754# else
1755 const unsigned offPTSrc = 0;
1756# endif
1757 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1758 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1759 iPTDst = 0;
1760 else
1761 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1762 for (; iPTDst < iPTDstEnd; iPTDst++)
1763 {
1764 if (!pPTDst->a[iPTDst].n.u1Present)
1765 {
1766 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1767 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1768 NOREF(GCPtrCurPage);
1769#ifndef IN_RING0
1770 /*
1771 * Assuming kernel code will be marked as supervisor - and not as user level
1772 * and executed using a conforming code selector - And marked as readonly.
1773 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1774 */
1775 PPGMPAGE pPage;
1776 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1777 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1778 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
1779 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1780 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1781 )
1782#endif /* else: CSAM not active */
1783 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1784 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1785 GCPtrCurPage, PteSrc.n.u1Present,
1786 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1787 PteSrc.n.u1User & PdeSrc.n.u1User,
1788 (uint64_t)PteSrc.u,
1789 (uint64_t)pPTDst->a[iPTDst].u,
1790 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1791 }
1792 }
1793 }
1794 else
1795# endif /* PGM_SYNC_N_PAGES */
1796 {
1797 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1798 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1799 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1800 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1801 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1802 GCPtrPage, PteSrc.n.u1Present,
1803 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1804 PteSrc.n.u1User & PdeSrc.n.u1User,
1805 (uint64_t)PteSrc.u,
1806 (uint64_t)pPTDst->a[iPTDst].u,
1807 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1808 }
1809 }
1810 else /* MMIO or invalid page: emulated in #PF handler. */
1811 {
1812 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1813 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1814 }
1815 }
1816 else
1817 {
1818 /*
1819 * 4/2MB page - lazy syncing shadow 4K pages.
1820 * (There are many causes of getting here, it's no longer only CSAM.)
1821 */
1822 /* Calculate the GC physical address of this 4KB shadow page. */
1823 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1824 /* Find ram range. */
1825 PPGMPAGE pPage;
1826 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1827 if (RT_SUCCESS(rc))
1828 {
1829 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
1830
1831# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1832 /* Try to make the page writable if necessary. */
1833 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1834 && ( PGM_PAGE_IS_ZERO(pPage)
1835 || ( PdeSrc.n.u1Write
1836 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1837# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1838 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1839# endif
1840 )
1841 )
1842 )
1843 {
1844 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1845 AssertRC(rc);
1846 }
1847# endif
1848
1849 /*
1850 * Make shadow PTE entry.
1851 */
1852 SHWPTE PteDst;
1853 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1854 | PGM_PAGE_GET_HCPHYS(pPage);
1855 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1856 {
1857 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1858 PteDst.n.u1Write = 0;
1859 else
1860 PteDst.u = 0;
1861 }
1862
1863 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1864 if ( PteDst.n.u1Present
1865 && !pPTDst->a[iPTDst].n.u1Present)
1866 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1867
1868 /* Make sure only allocated pages are mapped writable. */
1869 if ( PteDst.n.u1Write
1870 && PteDst.n.u1Present
1871 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1872 {
1873 /* Still applies to shared pages. */
1874 Assert(!PGM_PAGE_IS_ZERO(pPage));
1875 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1876 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1877 }
1878
1879 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1880
1881 /*
1882 * If the page is not flagged as dirty and is writable, then make it read-only
1883 * at PD level, so we can set the dirty bit when the page is modified.
1884 *
1885 * ASSUMES that page access handlers are implemented on page table entry level.
1886 * Thus we will first catch the dirty access and set PDE.D and restart. If
1887 * there is an access handler, we'll trap again and let it work on the problem.
1888 */
1889 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1890 * As for invlpg, it simply frees the whole shadow PT.
1891 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1892 if ( !PdeSrc.b.u1Dirty
1893 && PdeSrc.b.u1Write)
1894 {
1895 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1896 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1897 PdeDst.n.u1Write = 0;
1898 }
1899 else
1900 {
1901 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1902 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1903 }
1904 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1905 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1906 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1907 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1908 }
1909 else
1910 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1911 }
1912# if defined(IN_RC)
1913 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1914 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1915# endif
1916 return VINF_SUCCESS;
1917 }
1918 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1919 }
1920 else
1921 {
1922 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1923 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1924 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1925 }
1926
1927 /*
1928 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1929 * Yea, I'm lazy.
1930 */
1931 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1932 ASMAtomicWriteSize(pPdeDst, 0);
1933
1934# if defined(IN_RC)
1935 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1936 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1937# endif
1938 PGM_INVL_VCPU_TLBS(pVCpu);
1939 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1940
1941#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1942 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1943 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1944 && !defined(IN_RC)
1945
1946# ifdef PGM_SYNC_N_PAGES
1947 /*
1948 * Get the shadow PDE, find the shadow page table in the pool.
1949 */
1950# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1951 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
1952
1953# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1954 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
1955
1956# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1957 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1958 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1959 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1960 X86PDEPAE PdeDst;
1961 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1962
1963 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1964 AssertRCSuccessReturn(rc, rc);
1965 Assert(pPDDst && pPdptDst);
1966 PdeDst = pPDDst->a[iPDDst];
1967# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1968 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1969 PEPTPD pPDDst;
1970 EPTPDE PdeDst;
1971
1972 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
1973 if (rc != VINF_SUCCESS)
1974 {
1975 AssertRC(rc);
1976 return rc;
1977 }
1978 Assert(pPDDst);
1979 PdeDst = pPDDst->a[iPDDst];
1980# endif
1981 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1982 if (!PdeDst.n.u1Present)
1983 {
1984 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
1985 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1986 return VINF_SUCCESS; /* force the instruction to be executed again. */
1987 }
1988
1989 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
1990 if (PdeDst.n.u1Size)
1991 {
1992 Assert(HWACCMIsNestedPagingActive(pVM));
1993 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
1994 return VINF_SUCCESS;
1995 }
1996
1997 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1998 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1999
2000 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2001 if ( cPages > 1
2002 && !(uErr & X86_TRAP_PF_P)
2003 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2004 {
2005 /*
2006 * This code path is currently only taken when the caller is PGMTrap0eHandler
2007 * for non-present pages!
2008 *
2009 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2010 * deal with locality.
2011 */
2012 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2013 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2014 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2015 iPTDst = 0;
2016 else
2017 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2018 for (; iPTDst < iPTDstEnd; iPTDst++)
2019 {
2020 if (!pPTDst->a[iPTDst].n.u1Present)
2021 {
2022 GSTPTE PteSrc;
2023
2024 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2025
2026 /* Fake the page table entry */
2027 PteSrc.u = GCPtrCurPage;
2028 PteSrc.n.u1Present = 1;
2029 PteSrc.n.u1Dirty = 1;
2030 PteSrc.n.u1Accessed = 1;
2031 PteSrc.n.u1Write = 1;
2032 PteSrc.n.u1User = 1;
2033
2034 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2035
2036 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2037 GCPtrCurPage, PteSrc.n.u1Present,
2038 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2039 PteSrc.n.u1User & PdeSrc.n.u1User,
2040 (uint64_t)PteSrc.u,
2041 (uint64_t)pPTDst->a[iPTDst].u,
2042 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2043
2044 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2045 break;
2046 }
2047 else
2048 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2049 }
2050 }
2051 else
2052# endif /* PGM_SYNC_N_PAGES */
2053 {
2054 GSTPTE PteSrc;
2055 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2056 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2057
2058 /* Fake the page table entry */
2059 PteSrc.u = GCPtrCurPage;
2060 PteSrc.n.u1Present = 1;
2061 PteSrc.n.u1Dirty = 1;
2062 PteSrc.n.u1Accessed = 1;
2063 PteSrc.n.u1Write = 1;
2064 PteSrc.n.u1User = 1;
2065 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2066
2067 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2068 GCPtrPage, PteSrc.n.u1Present,
2069 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2070 PteSrc.n.u1User & PdeSrc.n.u1User,
2071 (uint64_t)PteSrc.u,
2072 (uint64_t)pPTDst->a[iPTDst].u,
2073 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2074 }
2075 return VINF_SUCCESS;
2076
2077#else
2078 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2079 return VERR_INTERNAL_ERROR;
2080#endif
2081}
2082
2083
2084#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2085/**
2086 * Investigate page fault and handle write protection page faults caused by
2087 * dirty bit tracking.
2088 *
2089 * @returns VBox status code.
2090 * @param pVCpu The VMCPU handle.
2091 * @param uErr Page fault error code.
2092 * @param pPdeSrc Guest page directory entry.
2093 * @param GCPtrPage Guest context page address.
2094 */
2095PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2096{
2097 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2098 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2099 bool fMaybeWriteProtFault = fWriteFault && (fUserLevelFault || CPUMIsGuestR0WriteProtEnabled(pVCpu));
2100# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2101 bool fMaybeNXEFault = (uErr & X86_TRAP_PF_ID) && CPUMIsGuestNXEnabled(pVCpu);
2102# endif
2103 unsigned uPageFaultLevel;
2104 int rc;
2105 PVM pVM = pVCpu->CTX_SUFF(pVM);
2106
2107 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2108
2109# if PGM_GST_TYPE == PGM_TYPE_PAE \
2110 || PGM_GST_TYPE == PGM_TYPE_AMD64
2111
2112# if PGM_GST_TYPE == PGM_TYPE_AMD64
2113 PX86PML4E pPml4eSrc;
2114 PX86PDPE pPdpeSrc;
2115
2116 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc);
2117 Assert(pPml4eSrc);
2118
2119 /*
2120 * Real page fault? (PML4E level)
2121 */
2122 if ( (uErr & X86_TRAP_PF_RSVD)
2123 || !pPml4eSrc->n.u1Present
2124 || (fMaybeWriteProtFault && !pPml4eSrc->n.u1Write)
2125 || (fMaybeNXEFault && pPml4eSrc->n.u1NoExecute)
2126 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2127 )
2128 {
2129 uPageFaultLevel = 0;
2130 goto l_UpperLevelPageFault;
2131 }
2132 Assert(pPdpeSrc);
2133
2134# else /* PAE */
2135 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVCpu->pgm.s, GCPtrPage);
2136# endif /* PAE */
2137
2138 /*
2139 * Real page fault? (PDPE level)
2140 */
2141 if ( (uErr & X86_TRAP_PF_RSVD)
2142 || !pPdpeSrc->n.u1Present
2143# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2144 || (fMaybeWriteProtFault && !pPdpeSrc->lm.u1Write)
2145 || (fMaybeNXEFault && pPdpeSrc->lm.u1NoExecute)
2146 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2147# endif
2148 )
2149 {
2150 uPageFaultLevel = 1;
2151 goto l_UpperLevelPageFault;
2152 }
2153# endif
2154
2155 /*
2156 * Real page fault? (PDE level)
2157 */
2158 if ( (uErr & X86_TRAP_PF_RSVD)
2159 || !pPdeSrc->n.u1Present
2160 || (fMaybeWriteProtFault && !pPdeSrc->n.u1Write)
2161# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2162 || (fMaybeNXEFault && pPdeSrc->n.u1NoExecute)
2163# endif
2164 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2165 {
2166 uPageFaultLevel = 2;
2167 goto l_UpperLevelPageFault;
2168 }
2169
2170 /*
2171 * First check the easy case where the page directory has been marked read-only to track
2172 * the dirty bit of an emulated BIG page
2173 */
2174 if ( pPdeSrc->b.u1Size
2175# if PGM_GST_TYPE == PGM_TYPE_32BIT
2176 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2177# endif
2178 )
2179 {
2180 /* Mark guest page directory as accessed */
2181# if PGM_GST_TYPE == PGM_TYPE_AMD64
2182 pPml4eSrc->n.u1Accessed = 1;
2183 pPdpeSrc->lm.u1Accessed = 1;
2184# endif
2185 pPdeSrc->b.u1Accessed = 1;
2186
2187 /*
2188 * Only write protection page faults are relevant here.
2189 */
2190 if (fWriteFault)
2191 {
2192 /* Mark guest page directory as dirty (BIG page only). */
2193 pPdeSrc->b.u1Dirty = 1;
2194 }
2195 return VINF_SUCCESS;
2196 }
2197 /* else: 4KB page table */
2198
2199 /*
2200 * Map the guest page table.
2201 */
2202 PGSTPT pPTSrc;
2203 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2204 if (RT_SUCCESS(rc))
2205 {
2206 /*
2207 * Real page fault?
2208 */
2209 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2210 const GSTPTE PteSrc = *pPteSrc;
2211 if ( !PteSrc.n.u1Present
2212 || (fMaybeWriteProtFault && !PteSrc.n.u1Write)
2213# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2214 || (fMaybeNXEFault && PteSrc.n.u1NoExecute)
2215# endif
2216 || (fUserLevelFault && !PteSrc.n.u1User)
2217 )
2218 {
2219 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2220 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2221
2222 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2223 * See the 2nd case above as well.
2224 */
2225 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2226 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2227
2228 return VINF_EM_RAW_GUEST_TRAP;
2229 }
2230 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2231
2232 /*
2233 * Set the accessed bits in the page directory and the page table.
2234 */
2235# if PGM_GST_TYPE == PGM_TYPE_AMD64
2236 pPml4eSrc->n.u1Accessed = 1;
2237 pPdpeSrc->lm.u1Accessed = 1;
2238# endif
2239 pPdeSrc->n.u1Accessed = 1;
2240 pPteSrc->n.u1Accessed = 1;
2241
2242 /*
2243 * Only write protection page faults are relevant here.
2244 */
2245 if (fWriteFault)
2246 {
2247 /* Write access, so mark guest entry as dirty. */
2248# ifdef VBOX_WITH_STATISTICS
2249 if (!pPteSrc->n.u1Dirty)
2250 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2251 else
2252 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2253# endif
2254
2255 pPteSrc->n.u1Dirty = 1;
2256 }
2257 return VINF_SUCCESS;
2258 }
2259 AssertRC(rc);
2260 return rc;
2261
2262
2263l_UpperLevelPageFault:
2264 /*
2265 * Pagefault detected while checking the PML4E, PDPE or PDE.
2266 * Single exit handler to get rid of duplicate code paths.
2267 */
2268 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2269 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2270
2271 if ( 1
2272# if PGM_GST_TYPE == PGM_TYPE_AMD64
2273 && pPml4eSrc->n.u1Present
2274# endif
2275# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2276 && pPdpeSrc->n.u1Present
2277# endif
2278 && pPdeSrc->n.u1Present)
2279 {
2280 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2281 if ( pPdeSrc->b.u1Size
2282# if PGM_GST_TYPE == PGM_TYPE_32BIT
2283 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2284# endif
2285 )
2286 {
2287 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2288 }
2289 else
2290 {
2291 /*
2292 * Map the guest page table.
2293 */
2294 PGSTPT pPTSrc2;
2295 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc2);
2296 if (RT_SUCCESS(rc))
2297 {
2298 PGSTPTE pPteSrc = &pPTSrc2->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2299 if (pPteSrc->n.u1Present)
2300 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2301 }
2302 AssertRC(rc);
2303 }
2304 }
2305 return VINF_EM_RAW_GUEST_TRAP;
2306}
2307
2308/**
2309 * Handle dirty bit tracking faults.
2310 *
2311 * @returns VBox status code.
2312 * @param pVCpu The VMCPU handle.
2313 * @param uErr Page fault error code.
2314 * @param pPdeSrc Guest page directory entry.
2315 * @param pPdeDst Shadow page directory entry.
2316 * @param GCPtrPage Guest context page address.
2317 */
2318PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2319{
2320# if PGM_GST_TYPE == PGM_TYPE_32BIT
2321 const bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
2322# else
2323 const bool fBigPagesSupported = true;
2324# endif
2325 PVM pVM = pVCpu->CTX_SUFF(pVM);
2326 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2327
2328 Assert(PGMIsLockOwner(pVM));
2329
2330 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2331 {
2332 if ( pPdeDst->n.u1Present
2333 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2334 {
2335 SHWPDE PdeDst = *pPdeDst;
2336
2337 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2338 Assert(pPdeSrc->b.u1Write);
2339
2340 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2341 * fault again and take this path to only invalidate the entry.
2342 */
2343 PdeDst.n.u1Write = 1;
2344 PdeDst.n.u1Accessed = 1;
2345 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2346 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2347 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2348 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2349 }
2350# ifdef IN_RING0
2351 else
2352 /* Check for stale TLB entry; only applies to the SMP guest case. */
2353 if ( pVM->cCpus > 1
2354 && pPdeDst->n.u1Write
2355 && pPdeDst->n.u1Accessed)
2356 {
2357 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2358 if (pShwPage)
2359 {
2360 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2361 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2362 if ( pPteDst->n.u1Present
2363 && pPteDst->n.u1Write)
2364 {
2365 /* Stale TLB entry. */
2366 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2367 PGM_INVL_PG(pVCpu, GCPtrPage);
2368 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2369 }
2370 }
2371 }
2372# endif /* IN_RING0 */
2373 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2374 }
2375
2376 /*
2377 * Map the guest page table.
2378 */
2379 PGSTPT pPTSrc;
2380 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2381 if (RT_SUCCESS(rc))
2382 {
2383 if (pPdeDst->n.u1Present)
2384 {
2385 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2386 const GSTPTE PteSrc = *pPteSrc;
2387#ifndef IN_RING0
2388 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2389 * Our individual shadow handlers will provide more information and force a fatal exit.
2390 */
2391 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2392 {
2393 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2394 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2395 }
2396#endif
2397 /*
2398 * Map shadow page table.
2399 */
2400 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2401 if (pShwPage)
2402 {
2403 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2404 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2405 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2406 {
2407 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2408 {
2409 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2410 SHWPTE PteDst = *pPteDst;
2411
2412 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2413 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2414
2415 Assert(pPteSrc->n.u1Write);
2416
2417 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2418 * fault again and take this path to only invalidate the entry.
2419 */
2420 if (RT_LIKELY(pPage))
2421 {
2422 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2423 {
2424 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2425 PteDst.n.u1Write = 0;
2426 }
2427 else
2428 {
2429 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2430 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2431 {
2432 rc = pgmPhysPageMakeWritable(pVM, pPage, pPteSrc->u & GST_PTE_PG_MASK);
2433 AssertRC(rc);
2434 }
2435 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2436 {
2437 PteDst.n.u1Write = 1;
2438 }
2439 else
2440 {
2441 /* Still applies to shared pages. */
2442 Assert(!PGM_PAGE_IS_ZERO(pPage));
2443 PteDst.n.u1Write = 0;
2444 }
2445 }
2446 }
2447 else
2448 PteDst.n.u1Write = 1;
2449
2450 PteDst.n.u1Dirty = 1;
2451 PteDst.n.u1Accessed = 1;
2452 PteDst.au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2453 ASMAtomicWriteSize(pPteDst, PteDst.u);
2454 PGM_INVL_PG(pVCpu, GCPtrPage);
2455 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2456 }
2457# ifdef IN_RING0
2458 else
2459 /* Check for stale TLB entry; only applies to the SMP guest case. */
2460 if ( pVM->cCpus > 1
2461 && pPteDst->n.u1Write == 1
2462 && pPteDst->n.u1Accessed == 1)
2463 {
2464 /* Stale TLB entry. */
2465 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2466 PGM_INVL_PG(pVCpu, GCPtrPage);
2467 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2468 }
2469# endif
2470 }
2471 }
2472 else
2473 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2474 }
2475 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2476 }
2477 AssertRC(rc);
2478 return rc;
2479}
2480#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2481
2482
2483/**
2484 * Sync a shadow page table.
2485 *
2486 * The shadow page table is not present. This includes the case where
2487 * there is a conflict with a mapping.
2488 *
2489 * @returns VBox status code.
2490 * @param pVCpu The VMCPU handle.
2491 * @param iPD Page directory index.
2492 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2493 * Assume this is a temporary mapping.
2494 * @param GCPtrPage GC Pointer of the page that caused the fault
2495 */
2496PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2497{
2498 PVM pVM = pVCpu->CTX_SUFF(pVM);
2499 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2500
2501 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2502#if 0 /* rarely useful; leave for debugging. */
2503 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2504#endif
2505 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2506
2507 Assert(PGMIsLocked(pVM));
2508
2509#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2510 || PGM_GST_TYPE == PGM_TYPE_PAE \
2511 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2512 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2513 && PGM_SHW_TYPE != PGM_TYPE_EPT
2514
2515 int rc = VINF_SUCCESS;
2516
2517 /*
2518 * Validate input a little bit.
2519 */
2520 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2521# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2522 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2523 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2524
2525 /* Fetch the pgm pool shadow descriptor. */
2526 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2527 Assert(pShwPde);
2528
2529# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2530 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2531 PPGMPOOLPAGE pShwPde = NULL;
2532 PX86PDPAE pPDDst;
2533 PSHWPDE pPdeDst;
2534
2535 /* Fetch the pgm pool shadow descriptor. */
2536 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2537 AssertRCSuccessReturn(rc, rc);
2538 Assert(pShwPde);
2539
2540 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2541 pPdeDst = &pPDDst->a[iPDDst];
2542
2543# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2544 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2545 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2546 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2547 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2548 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2549 AssertRCSuccessReturn(rc, rc);
2550 Assert(pPDDst);
2551 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2552# endif
2553 SHWPDE PdeDst = *pPdeDst;
2554
2555# if PGM_GST_TYPE == PGM_TYPE_AMD64
2556 /* Fetch the pgm pool shadow descriptor. */
2557 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2558 Assert(pShwPde);
2559# endif
2560
2561# ifndef PGM_WITHOUT_MAPPINGS
2562 /*
2563 * Check for conflicts.
2564 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2565 * HC: Simply resolve the conflict.
2566 */
2567 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2568 {
2569 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2570# ifndef IN_RING3
2571 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2572 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2573 return VERR_ADDRESS_CONFLICT;
2574# else
2575 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2576 Assert(pMapping);
2577# if PGM_GST_TYPE == PGM_TYPE_32BIT
2578 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2579# elif PGM_GST_TYPE == PGM_TYPE_PAE
2580 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2581# else
2582 AssertFailed(); /* can't happen for amd64 */
2583# endif
2584 if (RT_FAILURE(rc))
2585 {
2586 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2587 return rc;
2588 }
2589 PdeDst = *pPdeDst;
2590# endif
2591 }
2592# endif /* !PGM_WITHOUT_MAPPINGS */
2593 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2594
2595# if defined(IN_RC)
2596 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2597 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2598# endif
2599
2600 /*
2601 * Sync page directory entry.
2602 */
2603 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2604 if (PdeSrc.n.u1Present)
2605 {
2606 /*
2607 * Allocate & map the page table.
2608 */
2609 PSHWPT pPTDst;
2610# if PGM_GST_TYPE == PGM_TYPE_32BIT
2611 const bool fPageTable = !PdeSrc.b.u1Size || !CPUMIsGuestPageSizeExtEnabled(pVCpu);
2612# else
2613 const bool fPageTable = !PdeSrc.b.u1Size;
2614# endif
2615 PPGMPOOLPAGE pShwPage;
2616 RTGCPHYS GCPhys;
2617 if (fPageTable)
2618 {
2619 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2620# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2621 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2622 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2623# endif
2624 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2625 }
2626 else
2627 {
2628 PGMPOOLACCESS enmAccess;
2629# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2630 const bool fNoExecute = PdeSrc.n.u1NoExecute && CPUMIsGuestNXEnabled(pVCpu);
2631# else
2632 const bool fNoExecute = false;
2633# endif
2634
2635 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2636# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2637 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2638 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2639# endif
2640 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2641 if (PdeSrc.n.u1User)
2642 {
2643 if (PdeSrc.n.u1Write)
2644 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2645 else
2646 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2647 }
2648 else
2649 {
2650 if (PdeSrc.n.u1Write)
2651 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2652 else
2653 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2654 }
2655 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2656 }
2657 if (rc == VINF_SUCCESS)
2658 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2659 else if (rc == VINF_PGM_CACHED_PAGE)
2660 {
2661 /*
2662 * The PT was cached, just hook it up.
2663 */
2664 if (fPageTable)
2665 PdeDst.u = pShwPage->Core.Key
2666 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2667 else
2668 {
2669 PdeDst.u = pShwPage->Core.Key
2670 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2671 /* (see explanation and assumptions further down.) */
2672 if ( !PdeSrc.b.u1Dirty
2673 && PdeSrc.b.u1Write)
2674 {
2675 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2676 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2677 PdeDst.b.u1Write = 0;
2678 }
2679 }
2680 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2681# if defined(IN_RC)
2682 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2683# endif
2684 return VINF_SUCCESS;
2685 }
2686 else if (rc == VERR_PGM_POOL_FLUSHED)
2687 {
2688 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2689# if defined(IN_RC)
2690 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2691# endif
2692 return VINF_PGM_SYNC_CR3;
2693 }
2694 else
2695 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2696 PdeDst.u &= X86_PDE_AVL_MASK;
2697 PdeDst.u |= pShwPage->Core.Key;
2698
2699 /*
2700 * Page directory has been accessed (this is a fault situation, remember).
2701 */
2702 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2703 if (fPageTable)
2704 {
2705 /*
2706 * Page table - 4KB.
2707 *
2708 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2709 */
2710 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2711 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2712 PGSTPT pPTSrc;
2713 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2714 if (RT_SUCCESS(rc))
2715 {
2716 /*
2717 * Start by syncing the page directory entry so CSAM's TLB trick works.
2718 */
2719 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2720 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2721 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2722# if defined(IN_RC)
2723 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2724# endif
2725
2726 /*
2727 * Directory/page user or supervisor privilege: (same goes for read/write)
2728 *
2729 * Directory Page Combined
2730 * U/S U/S U/S
2731 * 0 0 0
2732 * 0 1 0
2733 * 1 0 0
2734 * 1 1 1
2735 *
2736 * Simple AND operation. Table listed for completeness.
2737 *
2738 */
2739 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2740# ifdef PGM_SYNC_N_PAGES
2741 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2742 unsigned iPTDst = iPTBase;
2743 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2744 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2745 iPTDst = 0;
2746 else
2747 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2748# else /* !PGM_SYNC_N_PAGES */
2749 unsigned iPTDst = 0;
2750 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2751# endif /* !PGM_SYNC_N_PAGES */
2752# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2753 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2754 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2755# else
2756 const unsigned offPTSrc = 0;
2757# endif
2758 for (; iPTDst < iPTDstEnd; iPTDst++)
2759 {
2760 const unsigned iPTSrc = iPTDst + offPTSrc;
2761 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2762
2763 if (PteSrc.n.u1Present) /* we've already cleared it above */
2764 {
2765# ifndef IN_RING0
2766 /*
2767 * Assuming kernel code will be marked as supervisor - and not as user level
2768 * and executed using a conforming code selector - And marked as readonly.
2769 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2770 */
2771 PPGMPAGE pPage;
2772 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2773 || !CSAMDoesPageNeedScanning(pVM, (iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT))
2774 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2775 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2776 )
2777# endif
2778 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2779 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2780 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2781 PteSrc.n.u1Present,
2782 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2783 PteSrc.n.u1User & PdeSrc.n.u1User,
2784 (uint64_t)PteSrc.u,
2785 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2786 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2787 }
2788 } /* for PTEs */
2789 }
2790 }
2791 else
2792 {
2793 /*
2794 * Big page - 2/4MB.
2795 *
2796 * We'll walk the ram range list in parallel and optimize lookups.
2797 * We will only sync on shadow page table at a time.
2798 */
2799 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2800
2801 /**
2802 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2803 */
2804
2805 /*
2806 * Start by syncing the page directory entry.
2807 */
2808 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2809 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2810
2811 /*
2812 * If the page is not flagged as dirty and is writable, then make it read-only
2813 * at PD level, so we can set the dirty bit when the page is modified.
2814 *
2815 * ASSUMES that page access handlers are implemented on page table entry level.
2816 * Thus we will first catch the dirty access and set PDE.D and restart. If
2817 * there is an access handler, we'll trap again and let it work on the problem.
2818 */
2819 /** @todo move the above stuff to a section in the PGM documentation. */
2820 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2821 if ( !PdeSrc.b.u1Dirty
2822 && PdeSrc.b.u1Write)
2823 {
2824 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2825 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2826 PdeDst.b.u1Write = 0;
2827 }
2828 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2829# if defined(IN_RC)
2830 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2831# endif
2832
2833 /*
2834 * Fill the shadow page table.
2835 */
2836 /* Get address and flags from the source PDE. */
2837 SHWPTE PteDstBase;
2838 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2839
2840 /* Loop thru the entries in the shadow PT. */
2841 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2842 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2843 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2844 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2845 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2846 unsigned iPTDst = 0;
2847 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2848 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2849 {
2850 /* Advance ram range list. */
2851 while (pRam && GCPhys > pRam->GCPhysLast)
2852 pRam = pRam->CTX_SUFF(pNext);
2853 if (pRam && GCPhys >= pRam->GCPhys)
2854 {
2855 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2856 do
2857 {
2858 /* Make shadow PTE. */
2859 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2860 SHWPTE PteDst;
2861
2862# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2863 /* Try to make the page writable if necessary. */
2864 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2865 && ( PGM_PAGE_IS_ZERO(pPage)
2866 || ( PteDstBase.n.u1Write
2867 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2868# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2869 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2870# endif
2871 && !PGM_PAGE_IS_BALLOONED(pPage))
2872 )
2873 )
2874 {
2875 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2876 AssertRCReturn(rc, rc);
2877 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2878 break;
2879 }
2880# endif
2881
2882 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2883 {
2884 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2885 {
2886 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2887 PteDst.n.u1Write = 0;
2888 }
2889 else
2890 PteDst.u = 0;
2891 }
2892 else
2893 if (PGM_PAGE_IS_BALLOONED(pPage))
2894 {
2895 /* Skip ballooned pages. */
2896 PteDst.u = 0;
2897 }
2898# ifndef IN_RING0
2899 /*
2900 * Assuming kernel code will be marked as supervisor and not as user level and executed
2901 * using a conforming code selector. Don't check for readonly, as that implies the whole
2902 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2903 */
2904 else if ( !PdeSrc.n.u1User
2905 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
2906 PteDst.u = 0;
2907# endif
2908 else
2909 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2910
2911 /* Only map writable pages writable. */
2912 if ( PteDst.n.u1Write
2913 && PteDst.n.u1Present
2914 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2915 {
2916 /* Still applies to shared pages. */
2917 Assert(!PGM_PAGE_IS_ZERO(pPage));
2918 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2919 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2920 }
2921
2922 if (PteDst.n.u1Present)
2923 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2924
2925 /* commit it */
2926 pPTDst->a[iPTDst] = PteDst;
2927 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2928 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2929 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2930
2931 /* advance */
2932 GCPhys += PAGE_SIZE;
2933 iHCPage++;
2934 iPTDst++;
2935 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2936 && GCPhys <= pRam->GCPhysLast);
2937 }
2938 else if (pRam)
2939 {
2940 Log(("Invalid pages at %RGp\n", GCPhys));
2941 do
2942 {
2943 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2944 GCPhys += PAGE_SIZE;
2945 iPTDst++;
2946 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2947 && GCPhys < pRam->GCPhys);
2948 }
2949 else
2950 {
2951 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2952 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2953 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2954 }
2955 } /* while more PTEs */
2956 } /* 4KB / 4MB */
2957 }
2958 else
2959 AssertRelease(!PdeDst.n.u1Present);
2960
2961 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2962 if (RT_FAILURE(rc))
2963 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2964 return rc;
2965
2966#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2967 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2968 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2969 && !defined(IN_RC)
2970
2971 /*
2972 * Validate input a little bit.
2973 */
2974 int rc = VINF_SUCCESS;
2975# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2976 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2977 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2978
2979 /* Fetch the pgm pool shadow descriptor. */
2980 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2981 Assert(pShwPde);
2982
2983# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2984 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2985 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
2986 PX86PDPAE pPDDst;
2987 PSHWPDE pPdeDst;
2988
2989 /* Fetch the pgm pool shadow descriptor. */
2990 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2991 AssertRCSuccessReturn(rc, rc);
2992 Assert(pShwPde);
2993
2994 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2995 pPdeDst = &pPDDst->a[iPDDst];
2996
2997# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2998 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2999 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3000 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3001 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3002 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3003 AssertRCSuccessReturn(rc, rc);
3004 Assert(pPDDst);
3005 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3006
3007 /* Fetch the pgm pool shadow descriptor. */
3008 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3009 Assert(pShwPde);
3010
3011# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3012 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3013 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3014 PEPTPD pPDDst;
3015 PEPTPDPT pPdptDst;
3016
3017 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3018 if (rc != VINF_SUCCESS)
3019 {
3020 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3021 AssertRC(rc);
3022 return rc;
3023 }
3024 Assert(pPDDst);
3025 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3026
3027 /* Fetch the pgm pool shadow descriptor. */
3028 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3029 Assert(pShwPde);
3030# endif
3031 SHWPDE PdeDst = *pPdeDst;
3032
3033 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
3034 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3035
3036# if defined(PGM_WITH_LARGE_PAGES) && (PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE)
3037# if (PGM_SHW_TYPE != PGM_TYPE_EPT) /* PGM_TYPE_EPT implies nested paging */
3038 if (HWACCMIsNestedPagingActive(pVM))
3039# endif
3040 {
3041 PPGMPAGE pPage;
3042
3043 /* Check if we allocated a big page before for this 2 MB range. */
3044 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
3045 if (RT_SUCCESS(rc))
3046 {
3047 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3048
3049 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3050 {
3051 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3052 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3053 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3054 }
3055 else
3056 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
3057 {
3058 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3059 rc = pgmPhysIsValidLargePage(pVM, GCPtrPage, pPage);
3060 if (RT_SUCCESS(rc))
3061 {
3062 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3063 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3064 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3065 }
3066 }
3067 else
3068 if (PGMIsUsingLargePages(pVM))
3069 {
3070 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3071 if (RT_SUCCESS(rc))
3072 {
3073 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3074 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3075 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3076 }
3077 else
3078 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3079 }
3080
3081 if (HCPhys != NIL_RTHCPHYS)
3082 {
3083 PdeDst.u &= X86_PDE_AVL_MASK;
3084 PdeDst.u |= HCPhys;
3085 PdeDst.n.u1Present = 1;
3086 PdeDst.n.u1Write = 1;
3087 PdeDst.b.u1Size = 1;
3088# if PGM_SHW_TYPE == PGM_TYPE_EPT
3089 PdeDst.n.u1Execute = 1;
3090 PdeDst.b.u1IgnorePAT = 1;
3091 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3092# else
3093 PdeDst.n.u1User = 1;
3094# endif
3095 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3096
3097 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3098 /* Add a reference to the first page only. */
3099 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3100
3101 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3102 return VINF_SUCCESS;
3103 }
3104 }
3105 }
3106# endif /* HC_ARCH_BITS == 64 */
3107
3108 GSTPDE PdeSrc;
3109 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3110 PdeSrc.n.u1Present = 1;
3111 PdeSrc.n.u1Write = 1;
3112 PdeSrc.n.u1Accessed = 1;
3113 PdeSrc.n.u1User = 1;
3114
3115 /*
3116 * Allocate & map the page table.
3117 */
3118 PSHWPT pPTDst;
3119 PPGMPOOLPAGE pShwPage;
3120 RTGCPHYS GCPhys;
3121
3122 /* Virtual address = physical address */
3123 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3124 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3125
3126 if ( rc == VINF_SUCCESS
3127 || rc == VINF_PGM_CACHED_PAGE)
3128 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3129 else
3130 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3131
3132 PdeDst.u &= X86_PDE_AVL_MASK;
3133 PdeDst.u |= pShwPage->Core.Key;
3134 PdeDst.n.u1Present = 1;
3135 PdeDst.n.u1Write = 1;
3136# if PGM_SHW_TYPE == PGM_TYPE_EPT
3137 PdeDst.n.u1Execute = 1;
3138# else
3139 PdeDst.n.u1User = 1;
3140 PdeDst.n.u1Accessed = 1;
3141# endif
3142 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3143
3144 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3145 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3146 return rc;
3147
3148#else
3149 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3150 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3151 return VERR_INTERNAL_ERROR;
3152#endif
3153}
3154
3155
3156
3157/**
3158 * Prefetch a page/set of pages.
3159 *
3160 * Typically used to sync commonly used pages before entering raw mode
3161 * after a CR3 reload.
3162 *
3163 * @returns VBox status code.
3164 * @param pVCpu The VMCPU handle.
3165 * @param GCPtrPage Page to invalidate.
3166 */
3167PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3168{
3169#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3170 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3171 /*
3172 * Check that all Guest levels thru the PDE are present, getting the
3173 * PD and PDE in the processes.
3174 */
3175 int rc = VINF_SUCCESS;
3176# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3177# if PGM_GST_TYPE == PGM_TYPE_32BIT
3178 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3179 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3180# elif PGM_GST_TYPE == PGM_TYPE_PAE
3181 unsigned iPDSrc;
3182 X86PDPE PdpeSrc;
3183 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3184 if (!pPDSrc)
3185 return VINF_SUCCESS; /* not present */
3186# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3187 unsigned iPDSrc;
3188 PX86PML4E pPml4eSrc;
3189 X86PDPE PdpeSrc;
3190 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3191 if (!pPDSrc)
3192 return VINF_SUCCESS; /* not present */
3193# endif
3194 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3195# else
3196 PGSTPD pPDSrc = NULL;
3197 const unsigned iPDSrc = 0;
3198 GSTPDE PdeSrc;
3199
3200 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3201 PdeSrc.n.u1Present = 1;
3202 PdeSrc.n.u1Write = 1;
3203 PdeSrc.n.u1Accessed = 1;
3204 PdeSrc.n.u1User = 1;
3205# endif
3206
3207 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3208 {
3209 PVM pVM = pVCpu->CTX_SUFF(pVM);
3210 pgmLock(pVM);
3211
3212# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3213 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3214# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3215 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3216 PX86PDPAE pPDDst;
3217 X86PDEPAE PdeDst;
3218# if PGM_GST_TYPE != PGM_TYPE_PAE
3219 X86PDPE PdpeSrc;
3220
3221 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3222 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3223# endif
3224 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3225 if (rc != VINF_SUCCESS)
3226 {
3227 pgmUnlock(pVM);
3228 AssertRC(rc);
3229 return rc;
3230 }
3231 Assert(pPDDst);
3232 PdeDst = pPDDst->a[iPDDst];
3233
3234# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3235 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3236 PX86PDPAE pPDDst;
3237 X86PDEPAE PdeDst;
3238
3239# if PGM_GST_TYPE == PGM_TYPE_PROT
3240 /* AMD-V nested paging */
3241 X86PML4E Pml4eSrc;
3242 X86PDPE PdpeSrc;
3243 PX86PML4E pPml4eSrc = &Pml4eSrc;
3244
3245 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3246 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3247 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3248# endif
3249
3250 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3251 if (rc != VINF_SUCCESS)
3252 {
3253 pgmUnlock(pVM);
3254 AssertRC(rc);
3255 return rc;
3256 }
3257 Assert(pPDDst);
3258 PdeDst = pPDDst->a[iPDDst];
3259# endif
3260 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3261 {
3262 if (!PdeDst.n.u1Present)
3263 {
3264 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3265 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3266 }
3267 else
3268 {
3269 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3270 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3271 * makes no sense to prefetch more than one page.
3272 */
3273 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3274 if (RT_SUCCESS(rc))
3275 rc = VINF_SUCCESS;
3276 }
3277 }
3278 pgmUnlock(pVM);
3279 }
3280 return rc;
3281
3282#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3283 return VINF_SUCCESS; /* ignore */
3284#endif
3285}
3286
3287
3288
3289
3290/**
3291 * Syncs a page during a PGMVerifyAccess() call.
3292 *
3293 * @returns VBox status code (informational included).
3294 * @param pVCpu The VMCPU handle.
3295 * @param GCPtrPage The address of the page to sync.
3296 * @param fPage The effective guest page flags.
3297 * @param uErr The trap error code.
3298 */
3299PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3300{
3301 PVM pVM = pVCpu->CTX_SUFF(pVM);
3302
3303 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3304
3305 Assert(!HWACCMIsNestedPagingActive(pVM));
3306#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3307 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3308
3309# ifndef IN_RING0
3310 if (!(fPage & X86_PTE_US))
3311 {
3312 /*
3313 * Mark this page as safe.
3314 */
3315 /** @todo not correct for pages that contain both code and data!! */
3316 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3317 CSAMMarkPage(pVM, GCPtrPage, true);
3318 }
3319# endif
3320
3321 /*
3322 * Get guest PD and index.
3323 */
3324# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3325# if PGM_GST_TYPE == PGM_TYPE_32BIT
3326 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3327 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3328# elif PGM_GST_TYPE == PGM_TYPE_PAE
3329 unsigned iPDSrc = 0;
3330 X86PDPE PdpeSrc;
3331 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3332
3333 if (pPDSrc)
3334 {
3335 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3336 return VINF_EM_RAW_GUEST_TRAP;
3337 }
3338# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3339 unsigned iPDSrc;
3340 PX86PML4E pPml4eSrc;
3341 X86PDPE PdpeSrc;
3342 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3343 if (!pPDSrc)
3344 {
3345 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3346 return VINF_EM_RAW_GUEST_TRAP;
3347 }
3348# endif
3349# else
3350 PGSTPD pPDSrc = NULL;
3351 const unsigned iPDSrc = 0;
3352# endif
3353 int rc = VINF_SUCCESS;
3354
3355 pgmLock(pVM);
3356
3357 /*
3358 * First check if the shadow pd is present.
3359 */
3360# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3361 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3362# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3363 PX86PDEPAE pPdeDst;
3364 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3365 PX86PDPAE pPDDst;
3366# if PGM_GST_TYPE != PGM_TYPE_PAE
3367 X86PDPE PdpeSrc;
3368
3369 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3370 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3371# endif
3372 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3373 if (rc != VINF_SUCCESS)
3374 {
3375 pgmUnlock(pVM);
3376 AssertRC(rc);
3377 return rc;
3378 }
3379 Assert(pPDDst);
3380 pPdeDst = &pPDDst->a[iPDDst];
3381
3382# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3383 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3384 PX86PDPAE pPDDst;
3385 PX86PDEPAE pPdeDst;
3386
3387# if PGM_GST_TYPE == PGM_TYPE_PROT
3388 /* AMD-V nested paging */
3389 X86PML4E Pml4eSrc;
3390 X86PDPE PdpeSrc;
3391 PX86PML4E pPml4eSrc = &Pml4eSrc;
3392
3393 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3394 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3395 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3396# endif
3397
3398 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3399 if (rc != VINF_SUCCESS)
3400 {
3401 pgmUnlock(pVM);
3402 AssertRC(rc);
3403 return rc;
3404 }
3405 Assert(pPDDst);
3406 pPdeDst = &pPDDst->a[iPDDst];
3407# endif
3408
3409# if defined(IN_RC)
3410 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3411 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3412# endif
3413
3414 if (!pPdeDst->n.u1Present)
3415 {
3416 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3417 if (rc != VINF_SUCCESS)
3418 {
3419# if defined(IN_RC)
3420 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3421 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3422# endif
3423 pgmUnlock(pVM);
3424 AssertRC(rc);
3425 return rc;
3426 }
3427 }
3428
3429# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3430 /* Check for dirty bit fault */
3431 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3432 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3433 Log(("PGMVerifyAccess: success (dirty)\n"));
3434 else
3435 {
3436 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3437# else
3438 {
3439 GSTPDE PdeSrc;
3440 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3441 PdeSrc.n.u1Present = 1;
3442 PdeSrc.n.u1Write = 1;
3443 PdeSrc.n.u1Accessed = 1;
3444 PdeSrc.n.u1User = 1;
3445
3446# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3447 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3448 if (uErr & X86_TRAP_PF_US)
3449 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3450 else /* supervisor */
3451 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3452
3453 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3454 if (RT_SUCCESS(rc))
3455 {
3456 /* Page was successfully synced */
3457 Log2(("PGMVerifyAccess: success (sync)\n"));
3458 rc = VINF_SUCCESS;
3459 }
3460 else
3461 {
3462 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3463 rc = VINF_EM_RAW_GUEST_TRAP;
3464 }
3465 }
3466# if defined(IN_RC)
3467 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3468 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3469# endif
3470 pgmUnlock(pVM);
3471 return rc;
3472
3473#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3474
3475 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3476 return VERR_INTERNAL_ERROR;
3477#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3478}
3479
3480
3481/**
3482 * Syncs the paging hierarchy starting at CR3.
3483 *
3484 * @returns VBox status code, no specials.
3485 * @param pVCpu The VMCPU handle.
3486 * @param cr0 Guest context CR0 register
3487 * @param cr3 Guest context CR3 register
3488 * @param cr4 Guest context CR4 register
3489 * @param fGlobal Including global page directories or not
3490 */
3491PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3492{
3493 PVM pVM = pVCpu->CTX_SUFF(pVM);
3494
3495 LogFlow(("SyncCR3 fGlobal=%d\n", !!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
3496
3497#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3498
3499 pgmLock(pVM);
3500
3501# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3502 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3503 if (pPool->cDirtyPages)
3504 pgmPoolResetDirtyPages(pVM);
3505# endif
3506
3507 /*
3508 * Update page access handlers.
3509 * The virtual are always flushed, while the physical are only on demand.
3510 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3511 * have to look into that later because it will have a bad influence on the performance.
3512 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3513 * bird: Yes, but that won't work for aliases.
3514 */
3515 /** @todo this MUST go away. See #1557. */
3516 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3517 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3518 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3519 pgmUnlock(pVM);
3520#endif /* !NESTED && !EPT */
3521
3522#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3523 /*
3524 * Nested / EPT - almost no work.
3525 */
3526 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3527 return VINF_SUCCESS;
3528
3529#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3530 /*
3531 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3532 * out the shadow parts when the guest modifies its tables.
3533 */
3534 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3535 return VINF_SUCCESS;
3536
3537#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3538
3539# ifndef PGM_WITHOUT_MAPPINGS
3540 /*
3541 * Check for and resolve conflicts with our guest mappings if they
3542 * are enabled and not fixed.
3543 */
3544 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
3545 {
3546 int rc = pgmMapResolveConflicts(pVM);
3547 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3548 if (rc == VINF_PGM_SYNC_CR3)
3549 {
3550 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3551 return VINF_PGM_SYNC_CR3;
3552 }
3553 }
3554# else
3555 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3556# endif
3557 return VINF_SUCCESS;
3558#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3559}
3560
3561
3562
3563
3564#ifdef VBOX_STRICT
3565#ifdef IN_RC
3566# undef AssertMsgFailed
3567# define AssertMsgFailed Log
3568#endif
3569#ifdef IN_RING3
3570# include <VBox/dbgf.h>
3571
3572/**
3573 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3574 *
3575 * @returns VBox status code (VINF_SUCCESS).
3576 * @param cr3 The root of the hierarchy.
3577 * @param crr The cr4, only PAE and PSE is currently used.
3578 * @param fLongMode Set if long mode, false if not long mode.
3579 * @param cMaxDepth Number of levels to dump.
3580 * @param pHlp Pointer to the output functions.
3581 */
3582RT_C_DECLS_BEGIN
3583VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3584RT_C_DECLS_END
3585
3586#endif
3587
3588/**
3589 * Checks that the shadow page table is in sync with the guest one.
3590 *
3591 * @returns The number of errors.
3592 * @param pVM The virtual machine.
3593 * @param pVCpu The VMCPU handle.
3594 * @param cr3 Guest context CR3 register
3595 * @param cr4 Guest context CR4 register
3596 * @param GCPtr Where to start. Defaults to 0.
3597 * @param cb How much to check. Defaults to everything.
3598 */
3599PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3600{
3601#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3602 return 0;
3603#else
3604 unsigned cErrors = 0;
3605 PVM pVM = pVCpu->CTX_SUFF(pVM);
3606 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3607
3608#if PGM_GST_TYPE == PGM_TYPE_PAE
3609 /** @todo currently broken; crashes below somewhere */
3610 AssertFailed();
3611#endif
3612
3613#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3614 || PGM_GST_TYPE == PGM_TYPE_PAE \
3615 || PGM_GST_TYPE == PGM_TYPE_AMD64
3616
3617# if PGM_GST_TYPE == PGM_TYPE_32BIT
3618 bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
3619# else
3620 bool fBigPagesSupported = true;
3621# endif
3622 PPGMCPU pPGM = &pVCpu->pgm.s;
3623 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3624 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3625# ifndef IN_RING0
3626 RTHCPHYS HCPhys; /* general usage. */
3627# endif
3628 int rc;
3629
3630 /*
3631 * Check that the Guest CR3 and all its mappings are correct.
3632 */
3633 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3634 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3635 false);
3636# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3637# if PGM_GST_TYPE == PGM_TYPE_32BIT
3638 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3639# else
3640 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3641# endif
3642 AssertRCReturn(rc, 1);
3643 HCPhys = NIL_RTHCPHYS;
3644 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3645 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3646# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3647 pgmGstGet32bitPDPtr(pPGM);
3648 RTGCPHYS GCPhys;
3649 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3650 AssertRCReturn(rc, 1);
3651 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3652# endif
3653# endif /* !IN_RING0 */
3654
3655 /*
3656 * Get and check the Shadow CR3.
3657 */
3658# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3659 unsigned cPDEs = X86_PG_ENTRIES;
3660 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3661# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3662# if PGM_GST_TYPE == PGM_TYPE_32BIT
3663 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3664# else
3665 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3666# endif
3667 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3668# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3669 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3670 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3671# endif
3672 if (cb != ~(RTGCPTR)0)
3673 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3674
3675/** @todo call the other two PGMAssert*() functions. */
3676
3677# if PGM_GST_TYPE == PGM_TYPE_AMD64
3678 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3679
3680 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3681 {
3682 PPGMPOOLPAGE pShwPdpt = NULL;
3683 PX86PML4E pPml4eSrc;
3684 PX86PML4E pPml4eDst;
3685 RTGCPHYS GCPhysPdptSrc;
3686
3687 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3688 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3689
3690 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3691 if (!pPml4eDst->n.u1Present)
3692 {
3693 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3694 continue;
3695 }
3696
3697 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3698 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3699
3700 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3701 {
3702 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3703 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3704 cErrors++;
3705 continue;
3706 }
3707
3708 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3709 {
3710 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3711 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3712 cErrors++;
3713 continue;
3714 }
3715
3716 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3717 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3718 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3719 {
3720 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3721 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3722 cErrors++;
3723 continue;
3724 }
3725# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3726 {
3727# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3728
3729# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3730 /*
3731 * Check the PDPTEs too.
3732 */
3733 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3734
3735 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3736 {
3737 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3738 PPGMPOOLPAGE pShwPde = NULL;
3739 PX86PDPE pPdpeDst;
3740 RTGCPHYS GCPhysPdeSrc;
3741# if PGM_GST_TYPE == PGM_TYPE_PAE
3742 X86PDPE PdpeSrc;
3743 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3744 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3745# else
3746 PX86PML4E pPml4eSrcIgn;
3747 X86PDPE PdpeSrc;
3748 PX86PDPT pPdptDst;
3749 PX86PDPAE pPDDst;
3750 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3751
3752 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3753 if (rc != VINF_SUCCESS)
3754 {
3755 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3756 GCPtr += 512 * _2M;
3757 continue; /* next PDPTE */
3758 }
3759 Assert(pPDDst);
3760# endif
3761 Assert(iPDSrc == 0);
3762
3763 pPdpeDst = &pPdptDst->a[iPdpt];
3764
3765 if (!pPdpeDst->n.u1Present)
3766 {
3767 GCPtr += 512 * _2M;
3768 continue; /* next PDPTE */
3769 }
3770
3771 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3772 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3773
3774 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3775 {
3776 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3777 GCPtr += 512 * _2M;
3778 cErrors++;
3779 continue;
3780 }
3781
3782 if (GCPhysPdeSrc != pShwPde->GCPhys)
3783 {
3784# if PGM_GST_TYPE == PGM_TYPE_AMD64
3785 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3786# else
3787 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3788# endif
3789 GCPtr += 512 * _2M;
3790 cErrors++;
3791 continue;
3792 }
3793
3794# if PGM_GST_TYPE == PGM_TYPE_AMD64
3795 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3796 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3797 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3798 {
3799 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3800 GCPtr += 512 * _2M;
3801 cErrors++;
3802 continue;
3803 }
3804# endif
3805
3806# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3807 {
3808# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3809# if PGM_GST_TYPE == PGM_TYPE_32BIT
3810 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3811# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3812 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3813# endif
3814# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3815 /*
3816 * Iterate the shadow page directory.
3817 */
3818 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3819 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3820
3821 for (;
3822 iPDDst < cPDEs;
3823 iPDDst++, GCPtr += cIncrement)
3824 {
3825# if PGM_SHW_TYPE == PGM_TYPE_PAE
3826 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3827# else
3828 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3829# endif
3830 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3831 {
3832 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3833 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3834 {
3835 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3836 cErrors++;
3837 continue;
3838 }
3839 }
3840 else if ( (PdeDst.u & X86_PDE_P)
3841 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3842 )
3843 {
3844 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3845 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3846 if (!pPoolPage)
3847 {
3848 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3849 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3850 cErrors++;
3851 continue;
3852 }
3853 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3854
3855 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3856 {
3857 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3858 GCPtr, (uint64_t)PdeDst.u));
3859 cErrors++;
3860 }
3861
3862 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3863 {
3864 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3865 GCPtr, (uint64_t)PdeDst.u));
3866 cErrors++;
3867 }
3868
3869 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3870 if (!PdeSrc.n.u1Present)
3871 {
3872 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3873 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3874 cErrors++;
3875 continue;
3876 }
3877
3878 if ( !PdeSrc.b.u1Size
3879 || !fBigPagesSupported)
3880 {
3881 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3882# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3883 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3884# endif
3885 }
3886 else
3887 {
3888# if PGM_GST_TYPE == PGM_TYPE_32BIT
3889 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3890 {
3891 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3892 GCPtr, (uint64_t)PdeSrc.u));
3893 cErrors++;
3894 continue;
3895 }
3896# endif
3897 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3898# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3899 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3900# endif
3901 }
3902
3903 if ( pPoolPage->enmKind
3904 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3905 {
3906 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3907 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3908 cErrors++;
3909 }
3910
3911 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3912 if (!pPhysPage)
3913 {
3914 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3915 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3916 cErrors++;
3917 continue;
3918 }
3919
3920 if (GCPhysGst != pPoolPage->GCPhys)
3921 {
3922 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3923 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3924 cErrors++;
3925 continue;
3926 }
3927
3928 if ( !PdeSrc.b.u1Size
3929 || !fBigPagesSupported)
3930 {
3931 /*
3932 * Page Table.
3933 */
3934 const GSTPT *pPTSrc;
3935 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3936 if (RT_FAILURE(rc))
3937 {
3938 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3939 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3940 cErrors++;
3941 continue;
3942 }
3943 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3944 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3945 {
3946 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3947 // (This problem will go away when/if we shadow multiple CR3s.)
3948 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3949 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3950 cErrors++;
3951 continue;
3952 }
3953 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3954 {
3955 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3956 GCPtr, (uint64_t)PdeDst.u));
3957 cErrors++;
3958 continue;
3959 }
3960
3961 /* iterate the page table. */
3962# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3963 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3964 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3965# else
3966 const unsigned offPTSrc = 0;
3967# endif
3968 for (unsigned iPT = 0, off = 0;
3969 iPT < RT_ELEMENTS(pPTDst->a);
3970 iPT++, off += PAGE_SIZE)
3971 {
3972 const SHWPTE PteDst = pPTDst->a[iPT];
3973
3974 /* skip not-present entries. */
3975 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3976 continue;
3977 Assert(PteDst.n.u1Present);
3978
3979 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3980 if (!PteSrc.n.u1Present)
3981 {
3982# ifdef IN_RING3
3983 PGMAssertHandlerAndFlagsInSync(pVM);
3984 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3985# endif
3986 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3987 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3988 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3989 cErrors++;
3990 continue;
3991 }
3992
3993 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3994# if 1 /** @todo sync accessed bit properly... */
3995 fIgnoreFlags |= X86_PTE_A;
3996# endif
3997
3998 /* match the physical addresses */
3999 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
4000 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
4001
4002# ifdef IN_RING3
4003 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4004 if (RT_FAILURE(rc))
4005 {
4006 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4007 {
4008 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4009 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4010 cErrors++;
4011 continue;
4012 }
4013 }
4014 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4015 {
4016 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4017 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4018 cErrors++;
4019 continue;
4020 }
4021# endif
4022
4023 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4024 if (!pPhysPage)
4025 {
4026# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4027 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4028 {
4029 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4030 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4031 cErrors++;
4032 continue;
4033 }
4034# endif
4035 if (PteDst.n.u1Write)
4036 {
4037 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4038 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4039 cErrors++;
4040 }
4041 fIgnoreFlags |= X86_PTE_RW;
4042 }
4043 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4044 {
4045 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4046 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4047 cErrors++;
4048 continue;
4049 }
4050
4051 /* flags */
4052 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4053 {
4054 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4055 {
4056 if (PteDst.n.u1Write)
4057 {
4058 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4059 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4060 cErrors++;
4061 continue;
4062 }
4063 fIgnoreFlags |= X86_PTE_RW;
4064 }
4065 else
4066 {
4067 if (PteDst.n.u1Present)
4068 {
4069 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4070 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4071 cErrors++;
4072 continue;
4073 }
4074 fIgnoreFlags |= X86_PTE_P;
4075 }
4076 }
4077 else
4078 {
4079 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4080 {
4081 if (PteDst.n.u1Write)
4082 {
4083 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4084 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4085 cErrors++;
4086 continue;
4087 }
4088 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4089 {
4090 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4091 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4092 cErrors++;
4093 continue;
4094 }
4095 if (PteDst.n.u1Dirty)
4096 {
4097 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4098 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4099 cErrors++;
4100 }
4101# if 0 /** @todo sync access bit properly... */
4102 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4103 {
4104 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4105 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4106 cErrors++;
4107 }
4108 fIgnoreFlags |= X86_PTE_RW;
4109# else
4110 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4111# endif
4112 }
4113 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4114 {
4115 /* access bit emulation (not implemented). */
4116 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4117 {
4118 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4119 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4120 cErrors++;
4121 continue;
4122 }
4123 if (!PteDst.n.u1Accessed)
4124 {
4125 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4126 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4127 cErrors++;
4128 }
4129 fIgnoreFlags |= X86_PTE_P;
4130 }
4131# ifdef DEBUG_sandervl
4132 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4133# endif
4134 }
4135
4136 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4137 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4138 )
4139 {
4140 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4141 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4142 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4143 cErrors++;
4144 continue;
4145 }
4146 } /* foreach PTE */
4147 }
4148 else
4149 {
4150 /*
4151 * Big Page.
4152 */
4153 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4154 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4155 {
4156 if (PdeDst.n.u1Write)
4157 {
4158 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4159 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4160 cErrors++;
4161 continue;
4162 }
4163 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4164 {
4165 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4166 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4167 cErrors++;
4168 continue;
4169 }
4170# if 0 /** @todo sync access bit properly... */
4171 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4172 {
4173 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4174 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4175 cErrors++;
4176 }
4177 fIgnoreFlags |= X86_PTE_RW;
4178# else
4179 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4180# endif
4181 }
4182 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4183 {
4184 /* access bit emulation (not implemented). */
4185 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4186 {
4187 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4188 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4189 cErrors++;
4190 continue;
4191 }
4192 if (!PdeDst.n.u1Accessed)
4193 {
4194 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4195 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4196 cErrors++;
4197 }
4198 fIgnoreFlags |= X86_PTE_P;
4199 }
4200
4201 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4202 {
4203 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4204 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4205 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4206 cErrors++;
4207 }
4208
4209 /* iterate the page table. */
4210 for (unsigned iPT = 0, off = 0;
4211 iPT < RT_ELEMENTS(pPTDst->a);
4212 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4213 {
4214 const SHWPTE PteDst = pPTDst->a[iPT];
4215
4216 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4217 {
4218 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4219 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4220 cErrors++;
4221 }
4222
4223 /* skip not-present entries. */
4224 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4225 continue;
4226
4227 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4228
4229 /* match the physical addresses */
4230 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4231
4232# ifdef IN_RING3
4233 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4234 if (RT_FAILURE(rc))
4235 {
4236 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4237 {
4238 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4239 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4240 cErrors++;
4241 }
4242 }
4243 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4244 {
4245 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4246 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4247 cErrors++;
4248 continue;
4249 }
4250# endif
4251 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4252 if (!pPhysPage)
4253 {
4254# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4255 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4256 {
4257 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4258 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4259 cErrors++;
4260 continue;
4261 }
4262# endif
4263 if (PteDst.n.u1Write)
4264 {
4265 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4266 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4267 cErrors++;
4268 }
4269 fIgnoreFlags |= X86_PTE_RW;
4270 }
4271 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4272 {
4273 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4274 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4275 cErrors++;
4276 continue;
4277 }
4278
4279 /* flags */
4280 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4281 {
4282 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4283 {
4284 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4285 {
4286 if (PteDst.n.u1Write)
4287 {
4288 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4289 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4290 cErrors++;
4291 continue;
4292 }
4293 fIgnoreFlags |= X86_PTE_RW;
4294 }
4295 }
4296 else
4297 {
4298 if (PteDst.n.u1Present)
4299 {
4300 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4301 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4302 cErrors++;
4303 continue;
4304 }
4305 fIgnoreFlags |= X86_PTE_P;
4306 }
4307 }
4308
4309 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4310 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4311 )
4312 {
4313 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4314 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4315 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4316 cErrors++;
4317 continue;
4318 }
4319 } /* for each PTE */
4320 }
4321 }
4322 /* not present */
4323
4324 } /* for each PDE */
4325
4326 } /* for each PDPTE */
4327
4328 } /* for each PML4E */
4329
4330# ifdef DEBUG
4331 if (cErrors)
4332 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4333# endif
4334
4335#endif /* GST == 32BIT, PAE or AMD64 */
4336 return cErrors;
4337
4338#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4339}
4340#endif /* VBOX_STRICT */
4341
4342
4343/**
4344 * Sets up the CR3 for shadow paging
4345 *
4346 * @returns Strict VBox status code.
4347 * @retval VINF_SUCCESS.
4348 *
4349 * @param pVCpu The VMCPU handle.
4350 * @param GCPhysCR3 The physical address in the CR3 register.
4351 */
4352PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4353{
4354 PVM pVM = pVCpu->CTX_SUFF(pVM);
4355
4356 /* Update guest paging info. */
4357#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4358 || PGM_GST_TYPE == PGM_TYPE_PAE \
4359 || PGM_GST_TYPE == PGM_TYPE_AMD64
4360
4361 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4362
4363 /*
4364 * Map the page CR3 points at.
4365 */
4366 RTHCPTR HCPtrGuestCR3;
4367 RTHCPHYS HCPhysGuestCR3;
4368 pgmLock(pVM);
4369 PPGMPAGE pPageCR3 = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4370 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4371 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4372 /** @todo this needs some reworking wrt. locking. */
4373# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4374 HCPtrGuestCR3 = NIL_RTHCPTR;
4375 int rc = VINF_SUCCESS;
4376# else
4377 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4378# endif
4379 pgmUnlock(pVM);
4380 if (RT_SUCCESS(rc))
4381 {
4382 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4383 if (RT_SUCCESS(rc))
4384 {
4385# ifdef IN_RC
4386 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4387# endif
4388# if PGM_GST_TYPE == PGM_TYPE_32BIT
4389 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4390# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4391 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4392# endif
4393 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4394
4395# elif PGM_GST_TYPE == PGM_TYPE_PAE
4396 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4397 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4398# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4399 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4400# endif
4401 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4402 Log(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4403
4404 /*
4405 * Map the 4 PDs too.
4406 */
4407 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVCpu->pgm.s);
4408 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4409 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4410 {
4411 if (pGuestPDPT->a[i].n.u1Present)
4412 {
4413 RTHCPTR HCPtr;
4414 RTHCPHYS HCPhys;
4415 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4416 pgmLock(pVM);
4417 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4418 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4419 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4420# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4421 HCPtr = NIL_RTHCPTR;
4422 int rc2 = VINF_SUCCESS;
4423# else
4424 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4425# endif
4426 pgmUnlock(pVM);
4427 if (RT_SUCCESS(rc2))
4428 {
4429 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4430 AssertRCReturn(rc, rc);
4431
4432 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4433# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4434 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4435# endif
4436 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4437 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4438# ifdef IN_RC
4439 PGM_INVL_PG(pVCpu, GCPtr);
4440# endif
4441 continue;
4442 }
4443 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4444 }
4445
4446 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4447# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4448 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4449# endif
4450 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4451 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4452# ifdef IN_RC
4453 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4454# endif
4455 }
4456
4457# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4458 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4459# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4460 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4461# endif
4462# endif
4463 }
4464 else
4465 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4466 }
4467 else
4468 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4469
4470#else /* prot/real stub */
4471 int rc = VINF_SUCCESS;
4472#endif
4473
4474 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4475# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4476 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4477 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4478 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4479 && PGM_GST_TYPE != PGM_TYPE_PROT))
4480
4481 Assert(!HWACCMIsNestedPagingActive(pVM));
4482
4483 /*
4484 * Update the shadow root page as well since that's not fixed.
4485 */
4486 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4487 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4488 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4489 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4490 PPGMPOOLPAGE pNewShwPageCR3;
4491
4492 pgmLock(pVM);
4493
4494# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4495 if (pPool->cDirtyPages)
4496 pgmPoolResetDirtyPages(pVM);
4497# endif
4498
4499 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4500 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4501 AssertFatalRC(rc);
4502 rc = VINF_SUCCESS;
4503
4504# ifdef IN_RC
4505 /*
4506 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4507 * state will be inconsistent! Flush important things now while
4508 * we still can and then make sure there are no ring-3 calls.
4509 */
4510 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4511 VMMRZCallRing3Disable(pVCpu);
4512# endif
4513
4514 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4515 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4516 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4517# ifdef IN_RING0
4518 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4519 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4520# elif defined(IN_RC)
4521 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4522 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4523# else
4524 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4525 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4526# endif
4527
4528# ifndef PGM_WITHOUT_MAPPINGS
4529 /*
4530 * Apply all hypervisor mappings to the new CR3.
4531 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4532 * make sure we check for conflicts in the new CR3 root.
4533 */
4534# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4535 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4536# endif
4537 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4538 AssertRCReturn(rc, rc);
4539# endif
4540
4541 /* Set the current hypervisor CR3. */
4542 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4543 SELMShadowCR3Changed(pVM, pVCpu);
4544
4545# ifdef IN_RC
4546 /* NOTE: The state is consistent again. */
4547 VMMRZCallRing3Enable(pVCpu);
4548# endif
4549
4550 /* Clean up the old CR3 root. */
4551 if ( pOldShwPageCR3
4552 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4553 {
4554 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4555# ifndef PGM_WITHOUT_MAPPINGS
4556 /* Remove the hypervisor mappings from the shadow page table. */
4557 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4558# endif
4559 /* Mark the page as unlocked; allow flushing again. */
4560 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4561
4562 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4563 }
4564 pgmUnlock(pVM);
4565# endif
4566
4567 return rc;
4568}
4569
4570/**
4571 * Unmaps the shadow CR3.
4572 *
4573 * @returns VBox status, no specials.
4574 * @param pVCpu The VMCPU handle.
4575 */
4576PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4577{
4578 LogFlow(("UnmapCR3\n"));
4579
4580 int rc = VINF_SUCCESS;
4581 PVM pVM = pVCpu->CTX_SUFF(pVM);
4582
4583 /*
4584 * Update guest paging info.
4585 */
4586#if PGM_GST_TYPE == PGM_TYPE_32BIT
4587 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4588# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4589 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4590# endif
4591 pVCpu->pgm.s.pGst32BitPdRC = 0;
4592
4593#elif PGM_GST_TYPE == PGM_TYPE_PAE
4594 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4595# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4596 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4597# endif
4598 pVCpu->pgm.s.pGstPaePdptRC = 0;
4599 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4600 {
4601 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4602# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4603 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4604# endif
4605 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4606 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4607 }
4608
4609#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4610 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4611# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4612 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4613# endif
4614
4615#else /* prot/real mode stub */
4616 /* nothing to do */
4617#endif
4618
4619#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4620 /*
4621 * Update shadow paging info.
4622 */
4623# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4624 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4625 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4626
4627# if PGM_GST_TYPE != PGM_TYPE_REAL
4628 Assert(!HWACCMIsNestedPagingActive(pVM));
4629# endif
4630
4631 pgmLock(pVM);
4632
4633# ifndef PGM_WITHOUT_MAPPINGS
4634 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4635 /* Remove the hypervisor mappings from the shadow page table. */
4636 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4637# endif
4638
4639 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4640 {
4641 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4642
4643 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4644
4645# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4646 if (pPool->cDirtyPages)
4647 pgmPoolResetDirtyPages(pVM);
4648# endif
4649
4650 /* Mark the page as unlocked; allow flushing again. */
4651 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4652
4653 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4654 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4655 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4656 pVCpu->pgm.s.pShwPageCR3RC = 0;
4657 pVCpu->pgm.s.iShwUser = 0;
4658 pVCpu->pgm.s.iShwUserTable = 0;
4659 }
4660 pgmUnlock(pVM);
4661# endif
4662#endif /* !IN_RC*/
4663
4664 return rc;
4665}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette