VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 28458

Last change on this file since 28458 was 28458, checked in by vboxsync, 15 years ago

Cleared entire page table entry

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 199.5 KB
Line 
1/* $Id: PGMAllBth.h 28458 2010-04-19 12:36:10Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27RT_C_DECLS_BEGIN
28PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
29PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
34PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
35PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
36PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
37#ifdef VBOX_STRICT
38PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
39#endif
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
42PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
43RT_C_DECLS_END
44
45
46/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
47#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
48# error "Invalid combination; PAE guest implies PAE shadow"
49#endif
50
51#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
52 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
53# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
54#endif
55
56#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
57 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
58# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
59#endif
60
61#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
62 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
63# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
64#endif
65
66
67#ifndef IN_RING3
68/**
69 * #PF Handler for raw-mode guest execution.
70 *
71 * @returns VBox status code (appropriate for trap handling and GC return).
72 *
73 * @param pVCpu VMCPU Handle.
74 * @param uErr The trap error code.
75 * @param pRegFrame Trap register frame.
76 * @param pvFault The fault address.
77 * @param pfLockTaken PGM lock taken here or not (out)
78 */
79PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
80{
81 PVM pVM = pVCpu->CTX_SUFF(pVM);
82
83 *pfLockTaken = false;
84
85# if defined(IN_RC) && defined(VBOX_STRICT)
86 PGMDynCheckLocks(pVM);
87# endif
88
89# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
90 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
91 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
92
93# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
94 /*
95 * Hide the instruction fetch trap indicator for now.
96 */
97 /** @todo NXE will change this and we must fix NXE in the switcher too! */
98 if (uErr & X86_TRAP_PF_ID)
99 {
100 uErr &= ~X86_TRAP_PF_ID;
101 TRPMSetErrorCode(pVCpu, uErr);
102 }
103# endif
104
105 /*
106 * Get PDs.
107 */
108 int rc;
109# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
110# if PGM_GST_TYPE == PGM_TYPE_32BIT
111 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
112 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
113
114# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
115
116# if PGM_GST_TYPE == PGM_TYPE_PAE
117 unsigned iPDSrc = 0; /* initialized to shut up gcc */
118 X86PDPE PdpeSrc;
119 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
120
121# elif PGM_GST_TYPE == PGM_TYPE_AMD64
122 unsigned iPDSrc = 0; /* initialized to shut up gcc */
123 PX86PML4E pPml4eSrc;
124 X86PDPE PdpeSrc;
125 PGSTPD pPDSrc;
126
127 pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
128 Assert(pPml4eSrc);
129# endif
130
131 /* Quick check for a valid guest trap. (PAE & AMD64) */
132 if (!pPDSrc)
133 {
134# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
135 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
136# else
137 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
138# endif
139 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
140 TRPMSetErrorCode(pVCpu, uErr);
141 return VINF_EM_RAW_GUEST_TRAP;
142 }
143# endif
144
145# else /* !PGM_WITH_PAGING */
146 PGSTPD pPDSrc = NULL;
147 const unsigned iPDSrc = 0;
148# endif /* !PGM_WITH_PAGING */
149
150# if !defined(PGM_WITHOUT_MAPPINGS) && ((PGM_GST_TYPE == PGM_TYPE_32BIT) || (PGM_GST_TYPE == PGM_TYPE_PAE))
151 /*
152 * Check for write conflicts with our hypervisor mapping early on. If the guest happens to access a non-present page,
153 * where our hypervisor is currently mapped, then we'll create a #PF storm in the guest.
154 */
155 if ( (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW)
156 && MMHyperIsInsideArea(pVM, pvFault))
157 {
158 /* Force a CR3 sync to check for conflicts and emulate the instruction. */
159 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
160 return VINF_EM_RAW_EMULATE_INSTR;
161 }
162# endif
163
164 /* First check for a genuine guest page fault. */
165# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
166 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
167 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDSrc->a[iPDSrc], pvFault);
168 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
169 if (rc == VINF_EM_RAW_GUEST_TRAP)
170 {
171 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
172 = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
173 return rc;
174 }
175# endif /* PGM_WITH_PAGING */
176
177 /* Take the big lock now. */
178 *pfLockTaken = true;
179 pgmLock(pVM);
180
181 /* Fetch the guest PDE */
182# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
183 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
184# else
185 GSTPDE PdeSrc;
186 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
187 PdeSrc.n.u1Present = 1;
188 PdeSrc.n.u1Write = 1;
189 PdeSrc.n.u1Accessed = 1;
190 PdeSrc.n.u1User = 1;
191# endif
192
193# if PGM_SHW_TYPE == PGM_TYPE_32BIT
194 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
195 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
196
197# elif PGM_SHW_TYPE == PGM_TYPE_PAE
198 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
199
200 PX86PDPAE pPDDst;
201# if PGM_GST_TYPE != PGM_TYPE_PAE
202 X86PDPE PdpeSrc;
203
204 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
205 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
206# endif
207 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
208 if (rc != VINF_SUCCESS)
209 {
210 AssertRC(rc);
211 return rc;
212 }
213 Assert(pPDDst);
214
215# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
216 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
217 PX86PDPAE pPDDst;
218# if PGM_GST_TYPE == PGM_TYPE_PROT
219 /* AMD-V nested paging */
220 X86PML4E Pml4eSrc;
221 X86PDPE PdpeSrc;
222 PX86PML4E pPml4eSrc = &Pml4eSrc;
223
224 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
225 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
226 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
227# endif
228
229 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
230 if (rc != VINF_SUCCESS)
231 {
232 AssertRC(rc);
233 return rc;
234 }
235 Assert(pPDDst);
236
237# elif PGM_SHW_TYPE == PGM_TYPE_EPT
238 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
239 PEPTPD pPDDst;
240
241 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
242 if (rc != VINF_SUCCESS)
243 {
244 AssertRC(rc);
245 return rc;
246 }
247 Assert(pPDDst);
248# endif
249
250# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
251 /* Dirty page handling. */
252 if (uErr & X86_TRAP_PF_RW) /* write fault? */
253 {
254 /*
255 * If we successfully correct the write protection fault due to dirty bit
256 * tracking, then return immediately.
257 */
258 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
259 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
260 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
261 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
262 {
263 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
264 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
265 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
266 return VINF_SUCCESS;
267 }
268 }
269
270# if 0 /* rarely useful; leave for debugging. */
271 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
272# endif
273# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
274
275 /*
276 * A common case is the not-present error caused by lazy page table syncing.
277 *
278 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
279 * so we can safely assume that the shadow PT is present when calling SyncPage later.
280 *
281 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
282 * of mapping conflict and defer to SyncCR3 in R3.
283 * (Again, we do NOT support access handlers for non-present guest pages.)
284 *
285 */
286 Assert(PdeSrc.n.u1Present);
287 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
288 && !pPDDst->a[iPDDst].n.u1Present
289 )
290 {
291 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
292 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
293 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
294 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
295 if (RT_SUCCESS(rc))
296 {
297 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
298 return rc;
299 }
300 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
301 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
302 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
303 return VINF_PGM_SYNC_CR3;
304 }
305
306# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
307 /*
308 * Check if this address is within any of our mappings.
309 *
310 * This is *very* fast and it's gonna save us a bit of effort below and prevent
311 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
312 * (BTW, it's impossible to have physical access handlers in a mapping.)
313 */
314 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
315 {
316 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
317 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
318 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
319 {
320 if (pvFault < pMapping->GCPtr)
321 break;
322 if (pvFault - pMapping->GCPtr < pMapping->cb)
323 {
324 /*
325 * The first thing we check is if we've got an undetected conflict.
326 */
327 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
328 {
329 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
330 while (iPT-- > 0)
331 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
332 {
333 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
334 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
335 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
336 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
337 return VINF_PGM_SYNC_CR3;
338 }
339 }
340
341 /*
342 * Check if the fault address is in a virtual page access handler range.
343 */
344 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
345 if ( pCur
346 && pvFault - pCur->Core.Key < pCur->cb
347 && uErr & X86_TRAP_PF_RW)
348 {
349# ifdef IN_RC
350 STAM_PROFILE_START(&pCur->Stat, h);
351 pgmUnlock(pVM);
352 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
353 pgmLock(pVM);
354 STAM_PROFILE_STOP(&pCur->Stat, h);
355# else
356 AssertFailed();
357 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
358# endif
359 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
360 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
361 return rc;
362 }
363
364 /*
365 * Pretend we're not here and let the guest handle the trap.
366 */
367 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
368 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
369 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
370 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
371 return VINF_EM_RAW_GUEST_TRAP;
372 }
373 }
374 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
375 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
376# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
377
378 /*
379 * Check if this fault address is flagged for special treatment,
380 * which means we'll have to figure out the physical address and
381 * check flags associated with it.
382 *
383 * ASSUME that we can limit any special access handling to pages
384 * in page tables which the guest believes to be present.
385 */
386 Assert(PdeSrc.n.u1Present);
387 {
388 RTGCPHYS GCPhys = NIL_RTGCPHYS;
389
390# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
391 if ( PdeSrc.b.u1Size
392# if PGM_GST_TYPE == PGM_TYPE_32BIT
393 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
394# endif
395 )
396 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
397 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
398 else
399 {
400 PGSTPT pPTSrc;
401 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
402 if (RT_SUCCESS(rc))
403 {
404 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
405 if (pPTSrc->a[iPTESrc].n.u1Present)
406 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
407 }
408 }
409# else
410 /* No paging so the fault address is the physical address */
411 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
412# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
413
414 /*
415 * If we have a GC address we'll check if it has any flags set.
416 */
417 if (GCPhys != NIL_RTGCPHYS)
418 {
419 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
420
421 PPGMPAGE pPage;
422 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
423 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
424 {
425 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
426 {
427 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
428 {
429 /*
430 * Physical page access handler.
431 */
432 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
433 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
434 if (pCur)
435 {
436# ifdef PGM_SYNC_N_PAGES
437 /*
438 * If the region is write protected and we got a page not present fault, then sync
439 * the pages. If the fault was caused by a read, then restart the instruction.
440 * In case of write access continue to the GC write handler.
441 *
442 * ASSUMES that there is only one handler per page or that they have similar write properties.
443 */
444 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
445 && !(uErr & X86_TRAP_PF_P))
446 {
447 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
448 if ( RT_FAILURE(rc)
449 || !(uErr & X86_TRAP_PF_RW)
450 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
451 {
452 AssertRC(rc);
453 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
454 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
455 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
456 return rc;
457 }
458 }
459# endif
460
461 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
462 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
463 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
464
465# if defined(IN_RC) || defined(IN_RING0)
466 if (pCur->CTX_SUFF(pfnHandler))
467 {
468 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
469# ifdef IN_RING0
470 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
471# else
472 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
473# endif
474 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
475 void *pvUser = pCur->CTX_SUFF(pvUser);
476
477 STAM_PROFILE_START(&pCur->Stat, h);
478 if (fLeaveLock)
479 pgmUnlock(pVM); /* @todo: Not entirely safe. */
480
481 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
482 if (fLeaveLock)
483 pgmLock(pVM);
484# ifdef VBOX_WITH_STATISTICS
485 pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
486 if (pCur)
487 STAM_PROFILE_STOP(&pCur->Stat, h);
488# else
489 pCur = NULL; /* might be invalid by now. */
490# endif
491
492 }
493 else
494# endif
495 rc = VINF_EM_RAW_EMULATE_INSTR;
496
497 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
498 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
499 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
500 return rc;
501 }
502 }
503# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
504 else
505 {
506# ifdef PGM_SYNC_N_PAGES
507 /*
508 * If the region is write protected and we got a page not present fault, then sync
509 * the pages. If the fault was caused by a read, then restart the instruction.
510 * In case of write access continue to the GC write handler.
511 */
512 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
513 && !(uErr & X86_TRAP_PF_P))
514 {
515 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
516 if ( RT_FAILURE(rc)
517 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
518 || !(uErr & X86_TRAP_PF_RW))
519 {
520 AssertRC(rc);
521 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
522 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
523 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
524 return rc;
525 }
526 }
527# endif
528 /*
529 * Ok, it's an virtual page access handler.
530 *
531 * Since it's faster to search by address, we'll do that first
532 * and then retry by GCPhys if that fails.
533 */
534 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
535 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
536 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
537 */
538 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
539 if (pCur)
540 {
541 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
542 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
543 || !(uErr & X86_TRAP_PF_P)
544 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
545 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
546
547 if ( pvFault - pCur->Core.Key < pCur->cb
548 && ( uErr & X86_TRAP_PF_RW
549 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
550 {
551# ifdef IN_RC
552 STAM_PROFILE_START(&pCur->Stat, h);
553 pgmUnlock(pVM);
554 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
555 pgmLock(pVM);
556 STAM_PROFILE_STOP(&pCur->Stat, h);
557# else
558 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
559# endif
560 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
561 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
562 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
563 return rc;
564 }
565 /* Unhandled part of a monitored page */
566 }
567 else
568 {
569 /* Check by physical address. */
570 unsigned iPage;
571 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
572 &pCur, &iPage);
573 Assert(RT_SUCCESS(rc) || !pCur);
574 if ( pCur
575 && ( uErr & X86_TRAP_PF_RW
576 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
577 {
578 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
579# ifdef IN_RC
580 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
581 Assert(off < pCur->cb);
582 STAM_PROFILE_START(&pCur->Stat, h);
583 pgmUnlock(pVM);
584 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
585 pgmLock(pVM);
586 STAM_PROFILE_STOP(&pCur->Stat, h);
587# else
588 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
589# endif
590 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
591 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
592 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
593 return rc;
594 }
595 }
596 }
597# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
598
599 /*
600 * There is a handled area of the page, but this fault doesn't belong to it.
601 * We must emulate the instruction.
602 *
603 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
604 * we first check if this was a page-not-present fault for a page with only
605 * write access handlers. Restart the instruction if it wasn't a write access.
606 */
607 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
608
609 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
610 && !(uErr & X86_TRAP_PF_P))
611 {
612 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
613 if ( RT_FAILURE(rc)
614 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
615 || !(uErr & X86_TRAP_PF_RW))
616 {
617 AssertRC(rc);
618 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
619 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
620 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
621 return rc;
622 }
623 }
624
625 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
626 * It's writing to an unhandled part of the LDT page several million times.
627 */
628 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
629 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
630 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
631 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
632 return rc;
633 } /* if any kind of handler */
634
635# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
636 if (uErr & X86_TRAP_PF_P)
637 {
638 /*
639 * The page isn't marked, but it might still be monitored by a virtual page access handler.
640 * (ASSUMES no temporary disabling of virtual handlers.)
641 */
642 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
643 * we should correct both the shadow page table and physical memory flags, and not only check for
644 * accesses within the handler region but for access to pages with virtual handlers. */
645 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
646 if (pCur)
647 {
648 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
649 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
650 || !(uErr & X86_TRAP_PF_P)
651 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
652 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
653
654 if ( pvFault - pCur->Core.Key < pCur->cb
655 && ( uErr & X86_TRAP_PF_RW
656 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
657 {
658# ifdef IN_RC
659 STAM_PROFILE_START(&pCur->Stat, h);
660 pgmUnlock(pVM);
661 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
662 pgmLock(pVM);
663 STAM_PROFILE_STOP(&pCur->Stat, h);
664# else
665 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
666# endif
667 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
668 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
669 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
670 return rc;
671 }
672 }
673 }
674# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
675 }
676 else
677 {
678 /*
679 * When the guest accesses invalid physical memory (e.g. probing
680 * of RAM or accessing a remapped MMIO range), then we'll fall
681 * back to the recompiler to emulate the instruction.
682 */
683 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
684 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
685 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
686 return VINF_EM_RAW_EMULATE_INSTR;
687 }
688
689 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
690
691# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
692 /*
693 * We are here only if page is present in Guest page tables and
694 * trap is not handled by our handlers.
695 *
696 * Check it for page out-of-sync situation.
697 */
698 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
699
700 if (!(uErr & X86_TRAP_PF_P))
701 {
702 /*
703 * Page is not present in our page tables.
704 * Try to sync it!
705 * BTW, fPageShw is invalid in this branch!
706 */
707 if (uErr & X86_TRAP_PF_US)
708 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
709 else /* supervisor */
710 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
711
712 if (PGM_PAGE_IS_BALLOONED(pPage))
713 {
714 /* Emulate reads from ballooned pages as they are not present in our shadow page tables. (required for e.g. Solaris guests; soft ecc, random nr generator) */
715 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
716 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
717 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
718 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
719 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
720 return rc;
721 }
722# if defined(LOG_ENABLED) && !defined(IN_RING0)
723 RTGCPHYS GCPhys2;
724 uint64_t fPageGst2;
725 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
726 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
727 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
728# endif /* LOG_ENABLED */
729
730# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
731 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
732 {
733 uint64_t fPageGst;
734 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
735 if ( RT_SUCCESS(rc)
736 && !(fPageGst & X86_PTE_US))
737 {
738 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
739 if ( pvFault == (RTGCPTR)pRegFrame->eip
740 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
741# ifdef CSAM_DETECT_NEW_CODE_PAGES
742 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
743 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
744# endif /* CSAM_DETECT_NEW_CODE_PAGES */
745 )
746 {
747 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
748 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
749 if (rc != VINF_SUCCESS)
750 {
751 /*
752 * CSAM needs to perform a job in ring 3.
753 *
754 * Sync the page before going to the host context; otherwise we'll end up in a loop if
755 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
756 */
757 LogFlow(("CSAM ring 3 job\n"));
758 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
759 AssertRC(rc2);
760
761 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
762 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
763 return rc;
764 }
765 }
766# ifdef CSAM_DETECT_NEW_CODE_PAGES
767 else if ( uErr == X86_TRAP_PF_RW
768 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
769 && pRegFrame->ecx < 0x10000)
770 {
771 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
772 * to detect loading of new code pages.
773 */
774
775 /*
776 * Decode the instruction.
777 */
778 RTGCPTR PC;
779 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
780 if (rc == VINF_SUCCESS)
781 {
782 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
783 uint32_t cbOp;
784 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
785
786 /* For now we'll restrict this to rep movsw/d instructions */
787 if ( rc == VINF_SUCCESS
788 && pDis->pCurInstr->opcode == OP_MOVSWD
789 && (pDis->prefix & PREFIX_REP))
790 {
791 CSAMMarkPossibleCodePage(pVM, pvFault);
792 }
793 }
794 }
795# endif /* CSAM_DETECT_NEW_CODE_PAGES */
796
797 /*
798 * Mark this page as safe.
799 */
800 /** @todo not correct for pages that contain both code and data!! */
801 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
802 CSAMMarkPage(pVM, pvFault, true);
803 }
804 }
805# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
806 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
807 if (RT_SUCCESS(rc))
808 {
809 /* The page was successfully synced, return to the guest. */
810 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
811 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
812 return VINF_SUCCESS;
813 }
814 }
815 else /* uErr & X86_TRAP_PF_P: */
816 {
817 /*
818 * Write protected pages are make writable when the guest makes the first
819 * write to it. This happens for pages that are shared, write monitored
820 * and not yet allocated.
821 *
822 * Also, a side effect of not flushing global PDEs are out of sync pages due
823 * to physical monitored regions, that are no longer valid.
824 * Assume for now it only applies to the read/write flag.
825 */
826 if ( RT_SUCCESS(rc)
827 && (uErr & X86_TRAP_PF_RW))
828 {
829 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
830 {
831 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
832 Assert(!PGM_PAGE_IS_ZERO(pPage));
833 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
834
835 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
836 if (rc != VINF_SUCCESS)
837 {
838 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
839 return rc;
840 }
841 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
842 return VINF_EM_NO_MEMORY;
843 }
844
845# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
846 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
847 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
848 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG))
849 {
850 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
851 uint64_t fPageGst;
852 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
853 if ( RT_SUCCESS(rc)
854 && !(fPageGst & X86_PTE_RW))
855 {
856 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
857 if (RT_SUCCESS(rc))
858 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
859 else
860 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
861 return rc;
862 }
863 AssertMsg(RT_SUCCESS(rc), ("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
864 }
865# endif
866 /// @todo count the above case; else
867 if (uErr & X86_TRAP_PF_US)
868 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
869 else /* supervisor */
870 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
871
872 /*
873 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
874 * page is not present, which is not true in this case.
875 */
876 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
877 if (RT_SUCCESS(rc))
878 {
879 /*
880 * Page was successfully synced, return to guest.
881 * First invalidate the page as it might be in the TLB.
882 */
883# if PGM_SHW_TYPE == PGM_TYPE_EPT
884 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
885# else
886 PGM_INVL_PG(pVCpu, pvFault);
887# endif
888# ifdef VBOX_STRICT
889 RTGCPHYS GCPhys2;
890 uint64_t fPageGst;
891 if (!HWACCMIsNestedPagingActive(pVM))
892 {
893 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
894 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%d fPageGst=%RX64\n"));
895 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
896 }
897 uint64_t fPageShw;
898 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
899 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
900# endif /* VBOX_STRICT */
901 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
902 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
903 return VINF_SUCCESS;
904 }
905 }
906
907# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
908# ifdef VBOX_STRICT
909 /*
910 * Check for VMM page flags vs. Guest page flags consistency.
911 * Currently only for debug purposes.
912 */
913 if (RT_SUCCESS(rc))
914 {
915 /* Get guest page flags. */
916 uint64_t fPageGst;
917 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
918 if (RT_SUCCESS(rc))
919 {
920 uint64_t fPageShw;
921 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
922
923 /*
924 * Compare page flags.
925 * Note: we have AVL, A, D bits desynched.
926 */
927 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
928 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
929 }
930 else
931 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
932 }
933 else
934 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
935# endif /* VBOX_STRICT */
936# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
937 }
938 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
939# endif /* PGM_OUT_OF_SYNC_IN_GC */
940 }
941 else /* GCPhys == NIL_RTGCPHYS */
942 {
943 /*
944 * Page not present in Guest OS or invalid page table address.
945 * This is potential virtual page access handler food.
946 *
947 * For the present we'll say that our access handlers don't
948 * work for this case - we've already discarded the page table
949 * not present case which is identical to this.
950 *
951 * When we perchance find we need this, we will probably have AVL
952 * trees (offset based) to operate on and we can measure their speed
953 * agains mapping a page table and probably rearrange this handling
954 * a bit. (Like, searching virtual ranges before checking the
955 * physical address.)
956 */
957 }
958 }
959
960# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
961 /*
962 * Conclusion, this is a guest trap.
963 */
964 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
965 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
966 return VINF_EM_RAW_GUEST_TRAP;
967# else
968 /* present, but not a monitored page; perhaps the guest is probing physical memory */
969 return VINF_EM_RAW_EMULATE_INSTR;
970# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
971
972
973# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
974
975 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
976 return VERR_INTERNAL_ERROR;
977# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
978}
979#endif /* !IN_RING3 */
980
981
982/**
983 * Emulation of the invlpg instruction.
984 *
985 *
986 * @returns VBox status code.
987 *
988 * @param pVCpu The VMCPU handle.
989 * @param GCPtrPage Page to invalidate.
990 *
991 * @remark ASSUMES that the guest is updating before invalidating. This order
992 * isn't required by the CPU, so this is speculative and could cause
993 * trouble.
994 * @remark No TLB shootdown is done on any other VCPU as we assume that
995 * invlpg emulation is the *only* reason for calling this function.
996 * (The guest has to shoot down TLB entries on other CPUs itself)
997 * Currently true, but keep in mind!
998 *
999 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1000 */
1001PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
1002{
1003#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1004 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1005 && PGM_SHW_TYPE != PGM_TYPE_EPT
1006 int rc;
1007 PVM pVM = pVCpu->CTX_SUFF(pVM);
1008 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1009
1010 Assert(PGMIsLockOwner(pVM));
1011
1012 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1013
1014# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1015 if (pPool->cDirtyPages)
1016 pgmPoolResetDirtyPages(pVM);
1017# endif
1018
1019 /*
1020 * Get the shadow PD entry and skip out if this PD isn't present.
1021 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1022 */
1023# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1024 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1025 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1026
1027 /* Fetch the pgm pool shadow descriptor. */
1028 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1029 Assert(pShwPde);
1030
1031# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1032 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1033 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
1034
1035 /* If the shadow PDPE isn't present, then skip the invalidate. */
1036 if (!pPdptDst->a[iPdpt].n.u1Present)
1037 {
1038 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1039 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1040 return VINF_SUCCESS;
1041 }
1042
1043 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1044 PPGMPOOLPAGE pShwPde = NULL;
1045 PX86PDPAE pPDDst;
1046
1047 /* Fetch the pgm pool shadow descriptor. */
1048 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1049 AssertRCSuccessReturn(rc, rc);
1050 Assert(pShwPde);
1051
1052 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1053 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1054
1055# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1056 /* PML4 */
1057 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1058 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1059 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1060 PX86PDPAE pPDDst;
1061 PX86PDPT pPdptDst;
1062 PX86PML4E pPml4eDst;
1063 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1064 if (rc != VINF_SUCCESS)
1065 {
1066 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1067 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1068 return VINF_SUCCESS;
1069 }
1070 Assert(pPDDst);
1071
1072 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1073 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1074
1075 if (!pPdpeDst->n.u1Present)
1076 {
1077 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1078 return VINF_SUCCESS;
1079 }
1080
1081 /* Fetch the pgm pool shadow descriptor. */
1082 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1083 Assert(pShwPde);
1084
1085# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1086
1087 const SHWPDE PdeDst = *pPdeDst;
1088 if (!PdeDst.n.u1Present)
1089 {
1090 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1091 return VINF_SUCCESS;
1092 }
1093
1094# if defined(IN_RC)
1095 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1096 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1097# endif
1098
1099 /*
1100 * Get the guest PD entry and calc big page.
1101 */
1102# if PGM_GST_TYPE == PGM_TYPE_32BIT
1103 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
1104 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1105 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1106# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1107 unsigned iPDSrc = 0;
1108# if PGM_GST_TYPE == PGM_TYPE_PAE
1109 X86PDPE PdpeSrc;
1110 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1111# else /* AMD64 */
1112 PX86PML4E pPml4eSrc;
1113 X86PDPE PdpeSrc;
1114 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1115# endif
1116 GSTPDE PdeSrc;
1117
1118 if (pPDSrc)
1119 PdeSrc = pPDSrc->a[iPDSrc];
1120 else
1121 PdeSrc.u = 0;
1122# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1123
1124# if PGM_GST_TYPE == PGM_TYPE_32BIT
1125 const bool fIsBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1126# else
1127 const bool fIsBigPage = PdeSrc.b.u1Size;
1128# endif
1129
1130# ifdef IN_RING3
1131 /*
1132 * If a CR3 Sync is pending we may ignore the invalidate page operation
1133 * depending on the kind of sync and if it's a global page or not.
1134 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1135 */
1136# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1137 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1138 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1139 && fIsBigPage
1140 && PdeSrc.b.u1Global
1141 )
1142 )
1143# else
1144 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1145# endif
1146 {
1147 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1148 return VINF_SUCCESS;
1149 }
1150# endif /* IN_RING3 */
1151
1152 /*
1153 * Deal with the Guest PDE.
1154 */
1155 rc = VINF_SUCCESS;
1156 if (PdeSrc.n.u1Present)
1157 {
1158 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1159 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1160# ifndef PGM_WITHOUT_MAPPING
1161 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1162 {
1163 /*
1164 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1165 */
1166 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1167 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1168 pgmLock(pVM);
1169 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1170 pgmUnlock(pVM);
1171 }
1172 else
1173# endif /* !PGM_WITHOUT_MAPPING */
1174 if (!fIsBigPage)
1175 {
1176 /*
1177 * 4KB - page.
1178 */
1179 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1180 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1181
1182# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1183 /* Reset the modification counter (OpenSolaris trashes tlb entries very often) */
1184 if (pShwPage->cModifications)
1185 pShwPage->cModifications = 1;
1186# endif
1187
1188# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1189 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1190 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1191# endif
1192 if (pShwPage->GCPhys == GCPhys)
1193 {
1194# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1195 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1196 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1197 if (pPT->a[iPTEDst].n.u1Present)
1198 {
1199 /* This is very unlikely with caching/monitoring enabled. */
1200 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1201 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1202 }
1203# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1204 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1205 if (RT_SUCCESS(rc))
1206 rc = VINF_SUCCESS;
1207# endif
1208 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1209 PGM_INVL_PG(pVCpu, GCPtrPage);
1210 }
1211 else
1212 {
1213 /*
1214 * The page table address changed.
1215 */
1216 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1217 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1218 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1219 ASMAtomicWriteSize(pPdeDst, 0);
1220 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1221 PGM_INVL_VCPU_TLBS(pVCpu);
1222 }
1223 }
1224 else
1225 {
1226 /*
1227 * 2/4MB - page.
1228 */
1229 /* Before freeing the page, check if anything really changed. */
1230 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1231 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1232# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1233 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1234 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1235# endif
1236 if ( pShwPage->GCPhys == GCPhys
1237 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1238 {
1239 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1240 /** @todo PAT */
1241 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1242 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1243 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1244 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1245 {
1246 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1247 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1248# if defined(IN_RC)
1249 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1250 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1251# endif
1252 return VINF_SUCCESS;
1253 }
1254 }
1255
1256 /*
1257 * Ok, the page table is present and it's been changed in the guest.
1258 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1259 * We could do this for some flushes in GC too, but we need an algorithm for
1260 * deciding which 4MB pages containing code likely to be executed very soon.
1261 */
1262 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1263 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1264 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1265 ASMAtomicWriteSize(pPdeDst, 0);
1266 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1267 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1268 }
1269 }
1270 else
1271 {
1272 /*
1273 * Page directory is not present, mark shadow PDE not present.
1274 */
1275 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1276 {
1277 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1278 ASMAtomicWriteSize(pPdeDst, 0);
1279 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1280 PGM_INVL_PG(pVCpu, GCPtrPage);
1281 }
1282 else
1283 {
1284 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1285 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1286 }
1287 }
1288# if defined(IN_RC)
1289 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1290 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1291# endif
1292 return rc;
1293
1294#else /* guest real and protected mode */
1295 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1296 return VINF_SUCCESS;
1297#endif
1298}
1299
1300
1301/**
1302 * Update the tracking of shadowed pages.
1303 *
1304 * @param pVCpu The VMCPU handle.
1305 * @param pShwPage The shadow page.
1306 * @param HCPhys The physical page we is being dereferenced.
1307 */
1308DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1309{
1310 PVM pVM = pVCpu->CTX_SUFF(pVM);
1311
1312 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1313 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1314
1315 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1316 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1317 * 2. write protect all shadowed pages. I.e. implement caching.
1318 */
1319 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1320
1321 /*
1322 * Find the guest address.
1323 */
1324 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1325 pRam;
1326 pRam = pRam->CTX_SUFF(pNext))
1327 {
1328 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1329 while (iPage-- > 0)
1330 {
1331 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1332 {
1333 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1334
1335 Assert(pShwPage->cPresent);
1336 Assert(pPool->cPresent);
1337 pShwPage->cPresent--;
1338 pPool->cPresent--;
1339
1340 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1341 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1342 return;
1343 }
1344 }
1345 }
1346
1347 for (;;)
1348 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1349}
1350
1351
1352/**
1353 * Update the tracking of shadowed pages.
1354 *
1355 * @param pVCpu The VMCPU handle.
1356 * @param pShwPage The shadow page.
1357 * @param u16 The top 16-bit of the pPage->HCPhys.
1358 * @param pPage Pointer to the guest page. this will be modified.
1359 * @param iPTDst The index into the shadow table.
1360 */
1361DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1362{
1363 PVM pVM = pVCpu->CTX_SUFF(pVM);
1364 /*
1365 * Just deal with the simple first time here.
1366 */
1367 if (!u16)
1368 {
1369 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1370 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1371 }
1372 else
1373 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1374
1375 /* write back */
1376 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1377 PGM_PAGE_SET_TRACKING(pPage, u16);
1378
1379 /* update statistics. */
1380 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1381 pShwPage->cPresent++;
1382 if (pShwPage->iFirstPresent > iPTDst)
1383 pShwPage->iFirstPresent = iPTDst;
1384}
1385
1386
1387/**
1388 * Creates a 4K shadow page for a guest page.
1389 *
1390 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1391 * physical address. The PdeSrc argument only the flags are used. No page structured
1392 * will be mapped in this function.
1393 *
1394 * @param pVCpu The VMCPU handle.
1395 * @param pPteDst Destination page table entry.
1396 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1397 * Can safely assume that only the flags are being used.
1398 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1399 * @param pShwPage Pointer to the shadow page.
1400 * @param iPTDst The index into the shadow table.
1401 *
1402 * @remark Not used for 2/4MB pages!
1403 */
1404DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1405{
1406 if (PteSrc.n.u1Present)
1407 {
1408 PVM pVM = pVCpu->CTX_SUFF(pVM);
1409
1410# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1411 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1412 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64)
1413 if (pShwPage->fDirty)
1414 {
1415 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1416 PX86PTPAE pGstPT;
1417
1418 pGstPT = (PX86PTPAE)&pPool->aDirtyPages[pShwPage->idxDirty][0];
1419 pGstPT->a[iPTDst].u = PteSrc.u;
1420 }
1421# endif
1422 /*
1423 * Find the ram range.
1424 */
1425 PPGMPAGE pPage;
1426 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1427 if (RT_SUCCESS(rc))
1428 {
1429 /* Ignore ballooned pages. Don't return errors or use a fatal assert here as part of a shadow sync range might included ballooned pages. */
1430 if (PGM_PAGE_IS_BALLOONED(pPage))
1431 return;
1432
1433#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1434 /* Try to make the page writable if necessary. */
1435 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1436 && ( PGM_PAGE_IS_ZERO(pPage)
1437 || ( PteSrc.n.u1Write
1438 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1439# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1440 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1441# endif
1442 )
1443 )
1444 )
1445 {
1446 rc = pgmPhysPageMakeWritable(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1447 AssertRC(rc);
1448 }
1449#endif
1450
1451 /** @todo investiage PWT, PCD and PAT. */
1452 /*
1453 * Make page table entry.
1454 */
1455 SHWPTE PteDst;
1456 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1457 {
1458 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1459 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1460 {
1461#if PGM_SHW_TYPE == PGM_TYPE_EPT
1462 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1463 PteDst.n.u1Present = 1;
1464 PteDst.n.u1Execute = 1;
1465 PteDst.n.u1IgnorePAT = 1;
1466 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1467 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1468#else
1469 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1470 | PGM_PAGE_GET_HCPHYS(pPage);
1471#endif
1472 }
1473 else
1474 {
1475 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1476 PteDst.u = 0;
1477 }
1478 /** @todo count these two kinds. */
1479 }
1480 else
1481 {
1482#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1483 /*
1484 * If the page or page directory entry is not marked accessed,
1485 * we mark the page not present.
1486 */
1487 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1488 {
1489 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1490 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1491 PteDst.u = 0;
1492 }
1493 else
1494 /*
1495 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1496 * when the page is modified.
1497 */
1498 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1499 {
1500 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1501 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1502 | PGM_PAGE_GET_HCPHYS(pPage)
1503 | PGM_PTFLAGS_TRACK_DIRTY;
1504 }
1505 else
1506#endif
1507 {
1508 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1509#if PGM_SHW_TYPE == PGM_TYPE_EPT
1510 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1511 PteDst.n.u1Present = 1;
1512 PteDst.n.u1Write = 1;
1513 PteDst.n.u1Execute = 1;
1514 PteDst.n.u1IgnorePAT = 1;
1515 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1516 /* PteDst.n.u1Size = 0 */
1517#else
1518 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1519 | PGM_PAGE_GET_HCPHYS(pPage);
1520#endif
1521 }
1522 }
1523
1524 /*
1525 * Make sure only allocated pages are mapped writable.
1526 */
1527 if ( PteDst.n.u1Write
1528 && PteDst.n.u1Present
1529 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1530 {
1531 /* Still applies to shared pages. */
1532 Assert(!PGM_PAGE_IS_ZERO(pPage));
1533 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1534 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1535 }
1536
1537 /*
1538 * Keep user track up to date.
1539 */
1540 if (PteDst.n.u1Present)
1541 {
1542 if (!pPteDst->n.u1Present)
1543 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1544 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1545 {
1546 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1547 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1548 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1549 }
1550 }
1551 else if (pPteDst->n.u1Present)
1552 {
1553 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1554 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1555 }
1556
1557 /*
1558 * Update statistics and commit the entry.
1559 */
1560#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1561 if (!PteSrc.n.u1Global)
1562 pShwPage->fSeenNonGlobal = true;
1563#endif
1564 ASMAtomicWriteSize(pPteDst, PteDst.u);
1565 }
1566 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1567 /** @todo count these. */
1568 }
1569 else
1570 {
1571 /*
1572 * Page not-present.
1573 */
1574 Log2(("SyncPageWorker: page not present in Pte\n"));
1575 /* Keep user track up to date. */
1576 if (pPteDst->n.u1Present)
1577 {
1578 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1579 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1580 }
1581 ASMAtomicWriteSize(pPteDst, 0);
1582 /** @todo count these. */
1583 }
1584}
1585
1586
1587/**
1588 * Syncs a guest OS page.
1589 *
1590 * There are no conflicts at this point, neither is there any need for
1591 * page table allocations.
1592 *
1593 * @returns VBox status code.
1594 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1595 * @param pVCpu The VMCPU handle.
1596 * @param PdeSrc Page directory entry of the guest.
1597 * @param GCPtrPage Guest context page address.
1598 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1599 * @param uErr Fault error (X86_TRAP_PF_*).
1600 */
1601PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1602{
1603 PVM pVM = pVCpu->CTX_SUFF(pVM);
1604 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1605 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1606
1607 Assert(PGMIsLockOwner(pVM));
1608
1609#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1610 || PGM_GST_TYPE == PGM_TYPE_PAE \
1611 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1612 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1613 && PGM_SHW_TYPE != PGM_TYPE_EPT
1614
1615 /*
1616 * Assert preconditions.
1617 */
1618 Assert(PdeSrc.n.u1Present);
1619 Assert(cPages);
1620# if 0 /* rarely useful; leave for debugging. */
1621 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1622# endif
1623
1624 /*
1625 * Get the shadow PDE, find the shadow page table in the pool.
1626 */
1627# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1628 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1629 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1630
1631 /* Fetch the pgm pool shadow descriptor. */
1632 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1633 Assert(pShwPde);
1634
1635# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1636 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1637 PPGMPOOLPAGE pShwPde = NULL;
1638 PX86PDPAE pPDDst;
1639
1640 /* Fetch the pgm pool shadow descriptor. */
1641 int rc2 = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1642 AssertRCSuccessReturn(rc2, rc2);
1643 Assert(pShwPde);
1644
1645 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1646 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1647
1648# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1649 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1650 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1651 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1652 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1653
1654 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1655 AssertRCSuccessReturn(rc2, rc2);
1656 Assert(pPDDst && pPdptDst);
1657 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1658# endif
1659 SHWPDE PdeDst = *pPdeDst;
1660
1661 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1662 if (!PdeDst.n.u1Present)
1663 {
1664 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE p=%p/%RX64\n", pPdeDst, (uint64_t)PdeDst.u));
1665 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1666 return VINF_SUCCESS; /* force the instruction to be executed again. */
1667 }
1668
1669 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1670 Assert(pShwPage);
1671
1672# if PGM_GST_TYPE == PGM_TYPE_AMD64
1673 /* Fetch the pgm pool shadow descriptor. */
1674 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1675 Assert(pShwPde);
1676# endif
1677
1678# if defined(IN_RC)
1679 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1680 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1681# endif
1682
1683 /*
1684 * Check that the page is present and that the shadow PDE isn't out of sync.
1685 */
1686# if PGM_GST_TYPE == PGM_TYPE_32BIT
1687 const bool fBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1688# else
1689 const bool fBigPage = PdeSrc.b.u1Size;
1690# endif
1691 RTGCPHYS GCPhys;
1692 if (!fBigPage)
1693 {
1694 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1695# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1696 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1697 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1698# endif
1699 }
1700 else
1701 {
1702 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1703# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1704 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1705 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1706# endif
1707 }
1708 if ( pShwPage->GCPhys == GCPhys
1709 && PdeSrc.n.u1Present
1710 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1711 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1712# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1713 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !CPUMIsGuestNXEnabled(pVCpu))
1714# endif
1715 )
1716 {
1717 /*
1718 * Check that the PDE is marked accessed already.
1719 * Since we set the accessed bit *before* getting here on a #PF, this
1720 * check is only meant for dealing with non-#PF'ing paths.
1721 */
1722 if (PdeSrc.n.u1Accessed)
1723 {
1724 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1725 if (!fBigPage)
1726 {
1727 /*
1728 * 4KB Page - Map the guest page table.
1729 */
1730 PGSTPT pPTSrc;
1731 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1732 if (RT_SUCCESS(rc))
1733 {
1734# ifdef PGM_SYNC_N_PAGES
1735 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1736 if ( cPages > 1
1737 && !(uErr & X86_TRAP_PF_P)
1738 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1739 {
1740 /*
1741 * This code path is currently only taken when the caller is PGMTrap0eHandler
1742 * for non-present pages!
1743 *
1744 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1745 * deal with locality.
1746 */
1747 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1748# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1749 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1750 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1751# else
1752 const unsigned offPTSrc = 0;
1753# endif
1754 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1755 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1756 iPTDst = 0;
1757 else
1758 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1759 for (; iPTDst < iPTDstEnd; iPTDst++)
1760 {
1761 if (!pPTDst->a[iPTDst].n.u1Present)
1762 {
1763 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1764 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1765 NOREF(GCPtrCurPage);
1766#ifndef IN_RING0
1767 /*
1768 * Assuming kernel code will be marked as supervisor - and not as user level
1769 * and executed using a conforming code selector - And marked as readonly.
1770 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1771 */
1772 PPGMPAGE pPage;
1773 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1774 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1775 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
1776 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1777 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1778 )
1779#endif /* else: CSAM not active */
1780 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1781 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1782 GCPtrCurPage, PteSrc.n.u1Present,
1783 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1784 PteSrc.n.u1User & PdeSrc.n.u1User,
1785 (uint64_t)PteSrc.u,
1786 (uint64_t)pPTDst->a[iPTDst].u,
1787 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1788 }
1789 }
1790 }
1791 else
1792# endif /* PGM_SYNC_N_PAGES */
1793 {
1794 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1795 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1796 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1797 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1798 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1799 GCPtrPage, PteSrc.n.u1Present,
1800 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1801 PteSrc.n.u1User & PdeSrc.n.u1User,
1802 (uint64_t)PteSrc.u,
1803 (uint64_t)pPTDst->a[iPTDst].u,
1804 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1805 }
1806 }
1807 else /* MMIO or invalid page: emulated in #PF handler. */
1808 {
1809 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1810 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1811 }
1812 }
1813 else
1814 {
1815 /*
1816 * 4/2MB page - lazy syncing shadow 4K pages.
1817 * (There are many causes of getting here, it's no longer only CSAM.)
1818 */
1819 /* Calculate the GC physical address of this 4KB shadow page. */
1820 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1821 /* Find ram range. */
1822 PPGMPAGE pPage;
1823 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1824 if (RT_SUCCESS(rc))
1825 {
1826 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
1827
1828# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1829 /* Try to make the page writable if necessary. */
1830 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1831 && ( PGM_PAGE_IS_ZERO(pPage)
1832 || ( PdeSrc.n.u1Write
1833 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1834# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1835 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1836# endif
1837 )
1838 )
1839 )
1840 {
1841 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1842 AssertRC(rc);
1843 }
1844# endif
1845
1846 /*
1847 * Make shadow PTE entry.
1848 */
1849 SHWPTE PteDst;
1850 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1851 | PGM_PAGE_GET_HCPHYS(pPage);
1852 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1853 {
1854 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1855 PteDst.n.u1Write = 0;
1856 else
1857 PteDst.u = 0;
1858 }
1859
1860 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1861 if ( PteDst.n.u1Present
1862 && !pPTDst->a[iPTDst].n.u1Present)
1863 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1864
1865 /* Make sure only allocated pages are mapped writable. */
1866 if ( PteDst.n.u1Write
1867 && PteDst.n.u1Present
1868 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1869 {
1870 /* Still applies to shared pages. */
1871 Assert(!PGM_PAGE_IS_ZERO(pPage));
1872 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1873 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1874 }
1875
1876 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1877
1878 /*
1879 * If the page is not flagged as dirty and is writable, then make it read-only
1880 * at PD level, so we can set the dirty bit when the page is modified.
1881 *
1882 * ASSUMES that page access handlers are implemented on page table entry level.
1883 * Thus we will first catch the dirty access and set PDE.D and restart. If
1884 * there is an access handler, we'll trap again and let it work on the problem.
1885 */
1886 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1887 * As for invlpg, it simply frees the whole shadow PT.
1888 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1889 if ( !PdeSrc.b.u1Dirty
1890 && PdeSrc.b.u1Write)
1891 {
1892 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1893 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1894 PdeDst.n.u1Write = 0;
1895 }
1896 else
1897 {
1898 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1899 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1900 }
1901 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1902 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1903 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1904 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1905 }
1906 else
1907 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1908 }
1909# if defined(IN_RC)
1910 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1911 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1912# endif
1913 return VINF_SUCCESS;
1914 }
1915 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1916 }
1917 else
1918 {
1919 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1920 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1921 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1922 }
1923
1924 /*
1925 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1926 * Yea, I'm lazy.
1927 */
1928 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1929 ASMAtomicWriteSize(pPdeDst, 0);
1930
1931# if defined(IN_RC)
1932 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1933 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1934# endif
1935 PGM_INVL_VCPU_TLBS(pVCpu);
1936 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1937
1938#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1939 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1940 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1941 && !defined(IN_RC)
1942
1943# ifdef PGM_SYNC_N_PAGES
1944 /*
1945 * Get the shadow PDE, find the shadow page table in the pool.
1946 */
1947# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1948 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
1949
1950# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1951 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
1952
1953# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1954 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1955 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1956 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1957 X86PDEPAE PdeDst;
1958 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1959
1960 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1961 AssertRCSuccessReturn(rc, rc);
1962 Assert(pPDDst && pPdptDst);
1963 PdeDst = pPDDst->a[iPDDst];
1964# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1965 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1966 PEPTPD pPDDst;
1967 EPTPDE PdeDst;
1968
1969 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
1970 if (rc != VINF_SUCCESS)
1971 {
1972 AssertRC(rc);
1973 return rc;
1974 }
1975 Assert(pPDDst);
1976 PdeDst = pPDDst->a[iPDDst];
1977# endif
1978 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1979 if (!PdeDst.n.u1Present)
1980 {
1981 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
1982 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1983 return VINF_SUCCESS; /* force the instruction to be executed again. */
1984 }
1985
1986 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
1987 if (PdeDst.n.u1Size)
1988 {
1989 Assert(HWACCMIsNestedPagingActive(pVM));
1990 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
1991 return VINF_SUCCESS;
1992 }
1993
1994 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1995 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1996
1997 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1998 if ( cPages > 1
1999 && !(uErr & X86_TRAP_PF_P)
2000 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2001 {
2002 /*
2003 * This code path is currently only taken when the caller is PGMTrap0eHandler
2004 * for non-present pages!
2005 *
2006 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2007 * deal with locality.
2008 */
2009 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2010 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2011 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2012 iPTDst = 0;
2013 else
2014 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2015 for (; iPTDst < iPTDstEnd; iPTDst++)
2016 {
2017 if (!pPTDst->a[iPTDst].n.u1Present)
2018 {
2019 GSTPTE PteSrc;
2020
2021 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2022
2023 /* Fake the page table entry */
2024 PteSrc.u = GCPtrCurPage;
2025 PteSrc.n.u1Present = 1;
2026 PteSrc.n.u1Dirty = 1;
2027 PteSrc.n.u1Accessed = 1;
2028 PteSrc.n.u1Write = 1;
2029 PteSrc.n.u1User = 1;
2030
2031 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2032
2033 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2034 GCPtrCurPage, PteSrc.n.u1Present,
2035 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2036 PteSrc.n.u1User & PdeSrc.n.u1User,
2037 (uint64_t)PteSrc.u,
2038 (uint64_t)pPTDst->a[iPTDst].u,
2039 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2040
2041 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2042 break;
2043 }
2044 else
2045 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2046 }
2047 }
2048 else
2049# endif /* PGM_SYNC_N_PAGES */
2050 {
2051 GSTPTE PteSrc;
2052 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2053 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2054
2055 /* Fake the page table entry */
2056 PteSrc.u = GCPtrCurPage;
2057 PteSrc.n.u1Present = 1;
2058 PteSrc.n.u1Dirty = 1;
2059 PteSrc.n.u1Accessed = 1;
2060 PteSrc.n.u1Write = 1;
2061 PteSrc.n.u1User = 1;
2062 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2063
2064 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2065 GCPtrPage, PteSrc.n.u1Present,
2066 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2067 PteSrc.n.u1User & PdeSrc.n.u1User,
2068 (uint64_t)PteSrc.u,
2069 (uint64_t)pPTDst->a[iPTDst].u,
2070 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2071 }
2072 return VINF_SUCCESS;
2073
2074#else
2075 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2076 return VERR_INTERNAL_ERROR;
2077#endif
2078}
2079
2080
2081#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2082/**
2083 * Investigate page fault and handle write protection page faults caused by
2084 * dirty bit tracking.
2085 *
2086 * @returns VBox status code.
2087 * @param pVCpu The VMCPU handle.
2088 * @param uErr Page fault error code.
2089 * @param pPdeSrc Guest page directory entry.
2090 * @param GCPtrPage Guest context page address.
2091 */
2092PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2093{
2094 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2095 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2096 bool fMaybeWriteProtFault = fWriteFault && (fUserLevelFault || CPUMIsGuestR0WriteProtEnabled(pVCpu));
2097# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2098 bool fMaybeNXEFault = (uErr & X86_TRAP_PF_ID) && CPUMIsGuestNXEnabled(pVCpu);
2099# endif
2100 unsigned uPageFaultLevel;
2101 int rc;
2102 PVM pVM = pVCpu->CTX_SUFF(pVM);
2103
2104 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2105
2106# if PGM_GST_TYPE == PGM_TYPE_PAE \
2107 || PGM_GST_TYPE == PGM_TYPE_AMD64
2108
2109# if PGM_GST_TYPE == PGM_TYPE_AMD64
2110 PX86PML4E pPml4eSrc;
2111 PX86PDPE pPdpeSrc;
2112
2113 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc);
2114 Assert(pPml4eSrc);
2115
2116 /*
2117 * Real page fault? (PML4E level)
2118 */
2119 if ( (uErr & X86_TRAP_PF_RSVD)
2120 || !pPml4eSrc->n.u1Present
2121 || (fMaybeWriteProtFault && !pPml4eSrc->n.u1Write)
2122 || (fMaybeNXEFault && pPml4eSrc->n.u1NoExecute)
2123 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2124 )
2125 {
2126 uPageFaultLevel = 0;
2127 goto l_UpperLevelPageFault;
2128 }
2129 Assert(pPdpeSrc);
2130
2131# else /* PAE */
2132 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVCpu->pgm.s, GCPtrPage);
2133# endif /* PAE */
2134
2135 /*
2136 * Real page fault? (PDPE level)
2137 */
2138 if ( (uErr & X86_TRAP_PF_RSVD)
2139 || !pPdpeSrc->n.u1Present
2140# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2141 || (fMaybeWriteProtFault && !pPdpeSrc->lm.u1Write)
2142 || (fMaybeNXEFault && pPdpeSrc->lm.u1NoExecute)
2143 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2144# endif
2145 )
2146 {
2147 uPageFaultLevel = 1;
2148 goto l_UpperLevelPageFault;
2149 }
2150# endif
2151
2152 /*
2153 * Real page fault? (PDE level)
2154 */
2155 if ( (uErr & X86_TRAP_PF_RSVD)
2156 || !pPdeSrc->n.u1Present
2157 || (fMaybeWriteProtFault && !pPdeSrc->n.u1Write)
2158# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2159 || (fMaybeNXEFault && pPdeSrc->n.u1NoExecute)
2160# endif
2161 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2162 {
2163 uPageFaultLevel = 2;
2164 goto l_UpperLevelPageFault;
2165 }
2166
2167 /*
2168 * First check the easy case where the page directory has been marked read-only to track
2169 * the dirty bit of an emulated BIG page
2170 */
2171 if ( pPdeSrc->b.u1Size
2172# if PGM_GST_TYPE == PGM_TYPE_32BIT
2173 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2174# endif
2175 )
2176 {
2177 /* Mark guest page directory as accessed */
2178# if PGM_GST_TYPE == PGM_TYPE_AMD64
2179 pPml4eSrc->n.u1Accessed = 1;
2180 pPdpeSrc->lm.u1Accessed = 1;
2181# endif
2182 pPdeSrc->b.u1Accessed = 1;
2183
2184 /*
2185 * Only write protection page faults are relevant here.
2186 */
2187 if (fWriteFault)
2188 {
2189 /* Mark guest page directory as dirty (BIG page only). */
2190 pPdeSrc->b.u1Dirty = 1;
2191 }
2192 return VINF_SUCCESS;
2193 }
2194 /* else: 4KB page table */
2195
2196 /*
2197 * Map the guest page table.
2198 */
2199 PGSTPT pPTSrc;
2200 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2201 if (RT_SUCCESS(rc))
2202 {
2203 /*
2204 * Real page fault?
2205 */
2206 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2207 const GSTPTE PteSrc = *pPteSrc;
2208 if ( !PteSrc.n.u1Present
2209 || (fMaybeWriteProtFault && !PteSrc.n.u1Write)
2210# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2211 || (fMaybeNXEFault && PteSrc.n.u1NoExecute)
2212# endif
2213 || (fUserLevelFault && !PteSrc.n.u1User)
2214 )
2215 {
2216 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2217 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2218
2219 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2220 * See the 2nd case above as well.
2221 */
2222 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2223 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2224
2225 return VINF_EM_RAW_GUEST_TRAP;
2226 }
2227 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2228
2229 /*
2230 * Set the accessed bits in the page directory and the page table.
2231 */
2232# if PGM_GST_TYPE == PGM_TYPE_AMD64
2233 pPml4eSrc->n.u1Accessed = 1;
2234 pPdpeSrc->lm.u1Accessed = 1;
2235# endif
2236 pPdeSrc->n.u1Accessed = 1;
2237 pPteSrc->n.u1Accessed = 1;
2238
2239 /*
2240 * Only write protection page faults are relevant here.
2241 */
2242 if (fWriteFault)
2243 {
2244 /* Write access, so mark guest entry as dirty. */
2245# ifdef VBOX_WITH_STATISTICS
2246 if (!pPteSrc->n.u1Dirty)
2247 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2248 else
2249 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2250# endif
2251
2252 pPteSrc->n.u1Dirty = 1;
2253 }
2254 return VINF_SUCCESS;
2255 }
2256 AssertRC(rc);
2257 return rc;
2258
2259
2260l_UpperLevelPageFault:
2261 /*
2262 * Pagefault detected while checking the PML4E, PDPE or PDE.
2263 * Single exit handler to get rid of duplicate code paths.
2264 */
2265 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2266 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2267
2268 if ( 1
2269# if PGM_GST_TYPE == PGM_TYPE_AMD64
2270 && pPml4eSrc->n.u1Present
2271# endif
2272# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2273 && pPdpeSrc->n.u1Present
2274# endif
2275 && pPdeSrc->n.u1Present)
2276 {
2277 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2278 if ( pPdeSrc->b.u1Size
2279# if PGM_GST_TYPE == PGM_TYPE_32BIT
2280 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2281# endif
2282 )
2283 {
2284 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2285 }
2286 else
2287 {
2288 /*
2289 * Map the guest page table.
2290 */
2291 PGSTPT pPTSrc2;
2292 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc2);
2293 if (RT_SUCCESS(rc))
2294 {
2295 PGSTPTE pPteSrc = &pPTSrc2->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2296 if (pPteSrc->n.u1Present)
2297 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2298 }
2299 AssertRC(rc);
2300 }
2301 }
2302 return VINF_EM_RAW_GUEST_TRAP;
2303}
2304
2305/**
2306 * Handle dirty bit tracking faults.
2307 *
2308 * @returns VBox status code.
2309 * @param pVCpu The VMCPU handle.
2310 * @param uErr Page fault error code.
2311 * @param pPdeSrc Guest page directory entry.
2312 * @param pPdeDst Shadow page directory entry.
2313 * @param GCPtrPage Guest context page address.
2314 */
2315PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2316{
2317# if PGM_GST_TYPE == PGM_TYPE_32BIT
2318 const bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
2319# else
2320 const bool fBigPagesSupported = true;
2321# endif
2322 PVM pVM = pVCpu->CTX_SUFF(pVM);
2323 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2324
2325 Assert(PGMIsLockOwner(pVM));
2326
2327 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2328 {
2329 if ( pPdeDst->n.u1Present
2330 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2331 {
2332 SHWPDE PdeDst = *pPdeDst;
2333
2334 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2335 Assert(pPdeSrc->b.u1Write);
2336
2337 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2338 * fault again and take this path to only invalidate the entry.
2339 */
2340 PdeDst.n.u1Write = 1;
2341 PdeDst.n.u1Accessed = 1;
2342 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2343 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2344 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2345 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2346 }
2347# ifdef IN_RING0
2348 else
2349 /* Check for stale TLB entry; only applies to the SMP guest case. */
2350 if ( pVM->cCpus > 1
2351 && pPdeDst->n.u1Write
2352 && pPdeDst->n.u1Accessed)
2353 {
2354 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2355 if (pShwPage)
2356 {
2357 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2358 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2359 if ( pPteDst->n.u1Present
2360 && pPteDst->n.u1Write)
2361 {
2362 /* Stale TLB entry. */
2363 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2364 PGM_INVL_PG(pVCpu, GCPtrPage);
2365 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2366 }
2367 }
2368 }
2369# endif /* IN_RING0 */
2370 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2371 }
2372
2373 /*
2374 * Map the guest page table.
2375 */
2376 PGSTPT pPTSrc;
2377 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2378 if (RT_SUCCESS(rc))
2379 {
2380 if (pPdeDst->n.u1Present)
2381 {
2382 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2383 const GSTPTE PteSrc = *pPteSrc;
2384#ifndef IN_RING0
2385 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2386 * Our individual shadow handlers will provide more information and force a fatal exit.
2387 */
2388 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2389 {
2390 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2391 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2392 }
2393#endif
2394 /*
2395 * Map shadow page table.
2396 */
2397 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2398 if (pShwPage)
2399 {
2400 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2401 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2402 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2403 {
2404 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2405 {
2406 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2407 SHWPTE PteDst = *pPteDst;
2408
2409 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2410 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2411
2412 Assert(pPteSrc->n.u1Write);
2413
2414 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2415 * fault again and take this path to only invalidate the entry.
2416 */
2417 if (RT_LIKELY(pPage))
2418 {
2419 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2420 {
2421 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2422 PteDst.n.u1Write = 0;
2423 }
2424 else
2425 {
2426 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2427 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2428 {
2429 rc = pgmPhysPageMakeWritable(pVM, pPage, pPteSrc->u & GST_PTE_PG_MASK);
2430 AssertRC(rc);
2431 }
2432 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2433 {
2434 PteDst.n.u1Write = 1;
2435 }
2436 else
2437 {
2438 /* Still applies to shared pages. */
2439 Assert(!PGM_PAGE_IS_ZERO(pPage));
2440 PteDst.n.u1Write = 0;
2441 }
2442 }
2443 }
2444 else
2445 PteDst.n.u1Write = 1;
2446
2447 PteDst.n.u1Dirty = 1;
2448 PteDst.n.u1Accessed = 1;
2449 PteDst.au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2450 ASMAtomicWriteSize(pPteDst, PteDst.u);
2451 PGM_INVL_PG(pVCpu, GCPtrPage);
2452 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2453 }
2454# ifdef IN_RING0
2455 else
2456 /* Check for stale TLB entry; only applies to the SMP guest case. */
2457 if ( pVM->cCpus > 1
2458 && pPteDst->n.u1Write == 1
2459 && pPteDst->n.u1Accessed == 1)
2460 {
2461 /* Stale TLB entry. */
2462 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2463 PGM_INVL_PG(pVCpu, GCPtrPage);
2464 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2465 }
2466# endif
2467 }
2468 }
2469 else
2470 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2471 }
2472 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2473 }
2474 AssertRC(rc);
2475 return rc;
2476}
2477#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2478
2479
2480/**
2481 * Sync a shadow page table.
2482 *
2483 * The shadow page table is not present. This includes the case where
2484 * there is a conflict with a mapping.
2485 *
2486 * @returns VBox status code.
2487 * @param pVCpu The VMCPU handle.
2488 * @param iPD Page directory index.
2489 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2490 * Assume this is a temporary mapping.
2491 * @param GCPtrPage GC Pointer of the page that caused the fault
2492 */
2493PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2494{
2495 PVM pVM = pVCpu->CTX_SUFF(pVM);
2496 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2497
2498 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2499#if 0 /* rarely useful; leave for debugging. */
2500 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2501#endif
2502 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2503
2504 Assert(PGMIsLocked(pVM));
2505
2506#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2507 || PGM_GST_TYPE == PGM_TYPE_PAE \
2508 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2509 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2510 && PGM_SHW_TYPE != PGM_TYPE_EPT
2511
2512 int rc = VINF_SUCCESS;
2513
2514 /*
2515 * Validate input a little bit.
2516 */
2517 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2518# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2519 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2520 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2521
2522 /* Fetch the pgm pool shadow descriptor. */
2523 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2524 Assert(pShwPde);
2525
2526# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2527 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2528 PPGMPOOLPAGE pShwPde = NULL;
2529 PX86PDPAE pPDDst;
2530 PSHWPDE pPdeDst;
2531
2532 /* Fetch the pgm pool shadow descriptor. */
2533 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2534 AssertRCSuccessReturn(rc, rc);
2535 Assert(pShwPde);
2536
2537 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2538 pPdeDst = &pPDDst->a[iPDDst];
2539
2540# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2541 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2542 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2543 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2544 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2545 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2546 AssertRCSuccessReturn(rc, rc);
2547 Assert(pPDDst);
2548 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2549# endif
2550 SHWPDE PdeDst = *pPdeDst;
2551
2552# if PGM_GST_TYPE == PGM_TYPE_AMD64
2553 /* Fetch the pgm pool shadow descriptor. */
2554 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2555 Assert(pShwPde);
2556# endif
2557
2558# ifndef PGM_WITHOUT_MAPPINGS
2559 /*
2560 * Check for conflicts.
2561 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2562 * HC: Simply resolve the conflict.
2563 */
2564 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2565 {
2566 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2567# ifndef IN_RING3
2568 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2569 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2570 return VERR_ADDRESS_CONFLICT;
2571# else
2572 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2573 Assert(pMapping);
2574# if PGM_GST_TYPE == PGM_TYPE_32BIT
2575 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2576# elif PGM_GST_TYPE == PGM_TYPE_PAE
2577 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2578# else
2579 AssertFailed(); /* can't happen for amd64 */
2580# endif
2581 if (RT_FAILURE(rc))
2582 {
2583 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2584 return rc;
2585 }
2586 PdeDst = *pPdeDst;
2587# endif
2588 }
2589# endif /* !PGM_WITHOUT_MAPPINGS */
2590 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2591
2592# if defined(IN_RC)
2593 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2594 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2595# endif
2596
2597 /*
2598 * Sync page directory entry.
2599 */
2600 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2601 if (PdeSrc.n.u1Present)
2602 {
2603 /*
2604 * Allocate & map the page table.
2605 */
2606 PSHWPT pPTDst;
2607# if PGM_GST_TYPE == PGM_TYPE_32BIT
2608 const bool fPageTable = !PdeSrc.b.u1Size || !CPUMIsGuestPageSizeExtEnabled(pVCpu);
2609# else
2610 const bool fPageTable = !PdeSrc.b.u1Size;
2611# endif
2612 PPGMPOOLPAGE pShwPage;
2613 RTGCPHYS GCPhys;
2614 if (fPageTable)
2615 {
2616 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2617# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2618 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2619 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2620# endif
2621 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2622 }
2623 else
2624 {
2625 PGMPOOLACCESS enmAccess;
2626# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2627 const bool fNoExecute = PdeSrc.n.u1NoExecute && CPUMIsGuestNXEnabled(pVCpu);
2628# else
2629 const bool fNoExecute = false;
2630# endif
2631
2632 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2633# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2634 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2635 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2636# endif
2637 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2638 if (PdeSrc.n.u1User)
2639 {
2640 if (PdeSrc.n.u1Write)
2641 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2642 else
2643 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2644 }
2645 else
2646 {
2647 if (PdeSrc.n.u1Write)
2648 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2649 else
2650 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2651 }
2652 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2653 }
2654 if (rc == VINF_SUCCESS)
2655 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2656 else if (rc == VINF_PGM_CACHED_PAGE)
2657 {
2658 /*
2659 * The PT was cached, just hook it up.
2660 */
2661 if (fPageTable)
2662 PdeDst.u = pShwPage->Core.Key
2663 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2664 else
2665 {
2666 PdeDst.u = pShwPage->Core.Key
2667 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2668 /* (see explanation and assumptions further down.) */
2669 if ( !PdeSrc.b.u1Dirty
2670 && PdeSrc.b.u1Write)
2671 {
2672 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2673 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2674 PdeDst.b.u1Write = 0;
2675 }
2676 }
2677 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2678# if defined(IN_RC)
2679 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2680# endif
2681 return VINF_SUCCESS;
2682 }
2683 else if (rc == VERR_PGM_POOL_FLUSHED)
2684 {
2685 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2686# if defined(IN_RC)
2687 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2688# endif
2689 return VINF_PGM_SYNC_CR3;
2690 }
2691 else
2692 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2693 PdeDst.u &= X86_PDE_AVL_MASK;
2694 PdeDst.u |= pShwPage->Core.Key;
2695
2696 /*
2697 * Page directory has been accessed (this is a fault situation, remember).
2698 */
2699 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2700 if (fPageTable)
2701 {
2702 /*
2703 * Page table - 4KB.
2704 *
2705 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2706 */
2707 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2708 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2709 PGSTPT pPTSrc;
2710 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2711 if (RT_SUCCESS(rc))
2712 {
2713 /*
2714 * Start by syncing the page directory entry so CSAM's TLB trick works.
2715 */
2716 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2717 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2718 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2719# if defined(IN_RC)
2720 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2721# endif
2722
2723 /*
2724 * Directory/page user or supervisor privilege: (same goes for read/write)
2725 *
2726 * Directory Page Combined
2727 * U/S U/S U/S
2728 * 0 0 0
2729 * 0 1 0
2730 * 1 0 0
2731 * 1 1 1
2732 *
2733 * Simple AND operation. Table listed for completeness.
2734 *
2735 */
2736 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2737# ifdef PGM_SYNC_N_PAGES
2738 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2739 unsigned iPTDst = iPTBase;
2740 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2741 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2742 iPTDst = 0;
2743 else
2744 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2745# else /* !PGM_SYNC_N_PAGES */
2746 unsigned iPTDst = 0;
2747 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2748# endif /* !PGM_SYNC_N_PAGES */
2749# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2750 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2751 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2752# else
2753 const unsigned offPTSrc = 0;
2754# endif
2755 for (; iPTDst < iPTDstEnd; iPTDst++)
2756 {
2757 const unsigned iPTSrc = iPTDst + offPTSrc;
2758 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2759
2760 if (PteSrc.n.u1Present) /* we've already cleared it above */
2761 {
2762# ifndef IN_RING0
2763 /*
2764 * Assuming kernel code will be marked as supervisor - and not as user level
2765 * and executed using a conforming code selector - And marked as readonly.
2766 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2767 */
2768 PPGMPAGE pPage;
2769 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2770 || !CSAMDoesPageNeedScanning(pVM, (iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT))
2771 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2772 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2773 )
2774# endif
2775 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2776 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2777 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2778 PteSrc.n.u1Present,
2779 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2780 PteSrc.n.u1User & PdeSrc.n.u1User,
2781 (uint64_t)PteSrc.u,
2782 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2783 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2784 }
2785 } /* for PTEs */
2786 }
2787 }
2788 else
2789 {
2790 /*
2791 * Big page - 2/4MB.
2792 *
2793 * We'll walk the ram range list in parallel and optimize lookups.
2794 * We will only sync on shadow page table at a time.
2795 */
2796 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2797
2798 /**
2799 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2800 */
2801
2802 /*
2803 * Start by syncing the page directory entry.
2804 */
2805 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2806 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2807
2808 /*
2809 * If the page is not flagged as dirty and is writable, then make it read-only
2810 * at PD level, so we can set the dirty bit when the page is modified.
2811 *
2812 * ASSUMES that page access handlers are implemented on page table entry level.
2813 * Thus we will first catch the dirty access and set PDE.D and restart. If
2814 * there is an access handler, we'll trap again and let it work on the problem.
2815 */
2816 /** @todo move the above stuff to a section in the PGM documentation. */
2817 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2818 if ( !PdeSrc.b.u1Dirty
2819 && PdeSrc.b.u1Write)
2820 {
2821 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2822 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2823 PdeDst.b.u1Write = 0;
2824 }
2825 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2826# if defined(IN_RC)
2827 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2828# endif
2829
2830 /*
2831 * Fill the shadow page table.
2832 */
2833 /* Get address and flags from the source PDE. */
2834 SHWPTE PteDstBase;
2835 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2836
2837 /* Loop thru the entries in the shadow PT. */
2838 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2839 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2840 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2841 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2842 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2843 unsigned iPTDst = 0;
2844 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2845 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2846 {
2847 /* Advance ram range list. */
2848 while (pRam && GCPhys > pRam->GCPhysLast)
2849 pRam = pRam->CTX_SUFF(pNext);
2850 if (pRam && GCPhys >= pRam->GCPhys)
2851 {
2852 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2853 do
2854 {
2855 /* Make shadow PTE. */
2856 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2857 SHWPTE PteDst;
2858
2859# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2860 /* Try to make the page writable if necessary. */
2861 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2862 && ( PGM_PAGE_IS_ZERO(pPage)
2863 || ( PteDstBase.n.u1Write
2864 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2865# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2866 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2867# endif
2868 && !PGM_PAGE_IS_BALLOONED(pPage))
2869 )
2870 )
2871 {
2872 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2873 AssertRCReturn(rc, rc);
2874 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2875 break;
2876 }
2877# endif
2878
2879 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2880 {
2881 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2882 {
2883 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2884 PteDst.n.u1Write = 0;
2885 }
2886 else
2887 PteDst.u = 0;
2888 }
2889 else
2890 if (PGM_PAGE_IS_BALLOONED(pPage))
2891 {
2892 /* Skip ballooned pages. */
2893 PteDst.u = 0;
2894 }
2895# ifndef IN_RING0
2896 /*
2897 * Assuming kernel code will be marked as supervisor and not as user level and executed
2898 * using a conforming code selector. Don't check for readonly, as that implies the whole
2899 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2900 */
2901 else if ( !PdeSrc.n.u1User
2902 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
2903 PteDst.u = 0;
2904# endif
2905 else
2906 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2907
2908 /* Only map writable pages writable. */
2909 if ( PteDst.n.u1Write
2910 && PteDst.n.u1Present
2911 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2912 {
2913 /* Still applies to shared pages. */
2914 Assert(!PGM_PAGE_IS_ZERO(pPage));
2915 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2916 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2917 }
2918
2919 if (PteDst.n.u1Present)
2920 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2921
2922 /* commit it */
2923 pPTDst->a[iPTDst] = PteDst;
2924 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2925 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2926 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2927
2928 /* advance */
2929 GCPhys += PAGE_SIZE;
2930 iHCPage++;
2931 iPTDst++;
2932 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2933 && GCPhys <= pRam->GCPhysLast);
2934 }
2935 else if (pRam)
2936 {
2937 Log(("Invalid pages at %RGp\n", GCPhys));
2938 do
2939 {
2940 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2941 GCPhys += PAGE_SIZE;
2942 iPTDst++;
2943 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2944 && GCPhys < pRam->GCPhys);
2945 }
2946 else
2947 {
2948 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2949 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2950 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2951 }
2952 } /* while more PTEs */
2953 } /* 4KB / 4MB */
2954 }
2955 else
2956 AssertRelease(!PdeDst.n.u1Present);
2957
2958 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2959 if (RT_FAILURE(rc))
2960 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2961 return rc;
2962
2963#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2964 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2965 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2966 && !defined(IN_RC)
2967
2968 /*
2969 * Validate input a little bit.
2970 */
2971 int rc = VINF_SUCCESS;
2972# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2973 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2974 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2975
2976 /* Fetch the pgm pool shadow descriptor. */
2977 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2978 Assert(pShwPde);
2979
2980# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2981 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2982 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
2983 PX86PDPAE pPDDst;
2984 PSHWPDE pPdeDst;
2985
2986 /* Fetch the pgm pool shadow descriptor. */
2987 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2988 AssertRCSuccessReturn(rc, rc);
2989 Assert(pShwPde);
2990
2991 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2992 pPdeDst = &pPDDst->a[iPDDst];
2993
2994# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2995 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2996 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2997 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2998 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
2999 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3000 AssertRCSuccessReturn(rc, rc);
3001 Assert(pPDDst);
3002 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3003
3004 /* Fetch the pgm pool shadow descriptor. */
3005 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3006 Assert(pShwPde);
3007
3008# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3009 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3010 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3011 PEPTPD pPDDst;
3012 PEPTPDPT pPdptDst;
3013
3014 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3015 if (rc != VINF_SUCCESS)
3016 {
3017 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3018 AssertRC(rc);
3019 return rc;
3020 }
3021 Assert(pPDDst);
3022 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3023
3024 /* Fetch the pgm pool shadow descriptor. */
3025 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3026 Assert(pShwPde);
3027# endif
3028 SHWPDE PdeDst = *pPdeDst;
3029
3030 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
3031 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3032
3033# if defined(PGM_WITH_LARGE_PAGES) && (PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE)
3034# if (PGM_SHW_TYPE != PGM_TYPE_EPT) /* PGM_TYPE_EPT implies nested paging */
3035 if (HWACCMIsNestedPagingActive(pVM))
3036# endif
3037 {
3038 PPGMPAGE pPage;
3039
3040 /* Check if we allocated a big page before for this 2 MB range. */
3041 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
3042 if (RT_SUCCESS(rc))
3043 {
3044 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3045
3046 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3047 {
3048 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3049 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3050 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3051 }
3052 else
3053 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
3054 {
3055 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3056 rc = pgmPhysIsValidLargePage(pVM, GCPtrPage, pPage);
3057 if (RT_SUCCESS(rc))
3058 {
3059 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3060 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3061 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3062 }
3063 }
3064 else
3065 if (PGMIsUsingLargePages(pVM))
3066 {
3067 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3068 if (RT_SUCCESS(rc))
3069 {
3070 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3071 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3072 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3073 }
3074 else
3075 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3076 }
3077
3078 if (HCPhys != NIL_RTHCPHYS)
3079 {
3080 PdeDst.u &= X86_PDE_AVL_MASK;
3081 PdeDst.u |= HCPhys;
3082 PdeDst.n.u1Present = 1;
3083 PdeDst.n.u1Write = 1;
3084 PdeDst.b.u1Size = 1;
3085# if PGM_SHW_TYPE == PGM_TYPE_EPT
3086 PdeDst.n.u1Execute = 1;
3087 PdeDst.b.u1IgnorePAT = 1;
3088 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3089# else
3090 PdeDst.n.u1User = 1;
3091# endif
3092 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3093
3094 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3095 /* Add a reference to the first page only. */
3096 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3097
3098 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3099 return VINF_SUCCESS;
3100 }
3101 }
3102 }
3103# endif /* HC_ARCH_BITS == 64 */
3104
3105 GSTPDE PdeSrc;
3106 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3107 PdeSrc.n.u1Present = 1;
3108 PdeSrc.n.u1Write = 1;
3109 PdeSrc.n.u1Accessed = 1;
3110 PdeSrc.n.u1User = 1;
3111
3112 /*
3113 * Allocate & map the page table.
3114 */
3115 PSHWPT pPTDst;
3116 PPGMPOOLPAGE pShwPage;
3117 RTGCPHYS GCPhys;
3118
3119 /* Virtual address = physical address */
3120 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3121 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3122
3123 if ( rc == VINF_SUCCESS
3124 || rc == VINF_PGM_CACHED_PAGE)
3125 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3126 else
3127 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3128
3129 PdeDst.u &= X86_PDE_AVL_MASK;
3130 PdeDst.u |= pShwPage->Core.Key;
3131 PdeDst.n.u1Present = 1;
3132 PdeDst.n.u1Write = 1;
3133# if PGM_SHW_TYPE == PGM_TYPE_EPT
3134 PdeDst.n.u1Execute = 1;
3135# else
3136 PdeDst.n.u1User = 1;
3137 PdeDst.n.u1Accessed = 1;
3138# endif
3139 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3140
3141 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3142 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3143 return rc;
3144
3145#else
3146 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3147 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3148 return VERR_INTERNAL_ERROR;
3149#endif
3150}
3151
3152
3153
3154/**
3155 * Prefetch a page/set of pages.
3156 *
3157 * Typically used to sync commonly used pages before entering raw mode
3158 * after a CR3 reload.
3159 *
3160 * @returns VBox status code.
3161 * @param pVCpu The VMCPU handle.
3162 * @param GCPtrPage Page to invalidate.
3163 */
3164PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3165{
3166#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3167 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3168 /*
3169 * Check that all Guest levels thru the PDE are present, getting the
3170 * PD and PDE in the processes.
3171 */
3172 int rc = VINF_SUCCESS;
3173# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3174# if PGM_GST_TYPE == PGM_TYPE_32BIT
3175 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3176 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3177# elif PGM_GST_TYPE == PGM_TYPE_PAE
3178 unsigned iPDSrc;
3179 X86PDPE PdpeSrc;
3180 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3181 if (!pPDSrc)
3182 return VINF_SUCCESS; /* not present */
3183# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3184 unsigned iPDSrc;
3185 PX86PML4E pPml4eSrc;
3186 X86PDPE PdpeSrc;
3187 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3188 if (!pPDSrc)
3189 return VINF_SUCCESS; /* not present */
3190# endif
3191 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3192# else
3193 PGSTPD pPDSrc = NULL;
3194 const unsigned iPDSrc = 0;
3195 GSTPDE PdeSrc;
3196
3197 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3198 PdeSrc.n.u1Present = 1;
3199 PdeSrc.n.u1Write = 1;
3200 PdeSrc.n.u1Accessed = 1;
3201 PdeSrc.n.u1User = 1;
3202# endif
3203
3204 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3205 {
3206 PVM pVM = pVCpu->CTX_SUFF(pVM);
3207 pgmLock(pVM);
3208
3209# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3210 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3211# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3212 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3213 PX86PDPAE pPDDst;
3214 X86PDEPAE PdeDst;
3215# if PGM_GST_TYPE != PGM_TYPE_PAE
3216 X86PDPE PdpeSrc;
3217
3218 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3219 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3220# endif
3221 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3222 if (rc != VINF_SUCCESS)
3223 {
3224 pgmUnlock(pVM);
3225 AssertRC(rc);
3226 return rc;
3227 }
3228 Assert(pPDDst);
3229 PdeDst = pPDDst->a[iPDDst];
3230
3231# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3232 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3233 PX86PDPAE pPDDst;
3234 X86PDEPAE PdeDst;
3235
3236# if PGM_GST_TYPE == PGM_TYPE_PROT
3237 /* AMD-V nested paging */
3238 X86PML4E Pml4eSrc;
3239 X86PDPE PdpeSrc;
3240 PX86PML4E pPml4eSrc = &Pml4eSrc;
3241
3242 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3243 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3244 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3245# endif
3246
3247 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3248 if (rc != VINF_SUCCESS)
3249 {
3250 pgmUnlock(pVM);
3251 AssertRC(rc);
3252 return rc;
3253 }
3254 Assert(pPDDst);
3255 PdeDst = pPDDst->a[iPDDst];
3256# endif
3257 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3258 {
3259 if (!PdeDst.n.u1Present)
3260 {
3261 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3262 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3263 }
3264 else
3265 {
3266 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3267 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3268 * makes no sense to prefetch more than one page.
3269 */
3270 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3271 if (RT_SUCCESS(rc))
3272 rc = VINF_SUCCESS;
3273 }
3274 }
3275 pgmUnlock(pVM);
3276 }
3277 return rc;
3278
3279#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3280 return VINF_SUCCESS; /* ignore */
3281#endif
3282}
3283
3284
3285
3286
3287/**
3288 * Syncs a page during a PGMVerifyAccess() call.
3289 *
3290 * @returns VBox status code (informational included).
3291 * @param pVCpu The VMCPU handle.
3292 * @param GCPtrPage The address of the page to sync.
3293 * @param fPage The effective guest page flags.
3294 * @param uErr The trap error code.
3295 */
3296PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3297{
3298 PVM pVM = pVCpu->CTX_SUFF(pVM);
3299
3300 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3301
3302 Assert(!HWACCMIsNestedPagingActive(pVM));
3303#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3304 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3305
3306# ifndef IN_RING0
3307 if (!(fPage & X86_PTE_US))
3308 {
3309 /*
3310 * Mark this page as safe.
3311 */
3312 /** @todo not correct for pages that contain both code and data!! */
3313 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3314 CSAMMarkPage(pVM, GCPtrPage, true);
3315 }
3316# endif
3317
3318 /*
3319 * Get guest PD and index.
3320 */
3321# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3322# if PGM_GST_TYPE == PGM_TYPE_32BIT
3323 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3324 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3325# elif PGM_GST_TYPE == PGM_TYPE_PAE
3326 unsigned iPDSrc = 0;
3327 X86PDPE PdpeSrc;
3328 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3329
3330 if (pPDSrc)
3331 {
3332 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3333 return VINF_EM_RAW_GUEST_TRAP;
3334 }
3335# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3336 unsigned iPDSrc;
3337 PX86PML4E pPml4eSrc;
3338 X86PDPE PdpeSrc;
3339 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3340 if (!pPDSrc)
3341 {
3342 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3343 return VINF_EM_RAW_GUEST_TRAP;
3344 }
3345# endif
3346# else
3347 PGSTPD pPDSrc = NULL;
3348 const unsigned iPDSrc = 0;
3349# endif
3350 int rc = VINF_SUCCESS;
3351
3352 pgmLock(pVM);
3353
3354 /*
3355 * First check if the shadow pd is present.
3356 */
3357# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3358 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3359# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3360 PX86PDEPAE pPdeDst;
3361 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3362 PX86PDPAE pPDDst;
3363# if PGM_GST_TYPE != PGM_TYPE_PAE
3364 X86PDPE PdpeSrc;
3365
3366 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3367 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3368# endif
3369 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3370 if (rc != VINF_SUCCESS)
3371 {
3372 pgmUnlock(pVM);
3373 AssertRC(rc);
3374 return rc;
3375 }
3376 Assert(pPDDst);
3377 pPdeDst = &pPDDst->a[iPDDst];
3378
3379# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3380 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3381 PX86PDPAE pPDDst;
3382 PX86PDEPAE pPdeDst;
3383
3384# if PGM_GST_TYPE == PGM_TYPE_PROT
3385 /* AMD-V nested paging */
3386 X86PML4E Pml4eSrc;
3387 X86PDPE PdpeSrc;
3388 PX86PML4E pPml4eSrc = &Pml4eSrc;
3389
3390 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3391 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3392 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3393# endif
3394
3395 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3396 if (rc != VINF_SUCCESS)
3397 {
3398 pgmUnlock(pVM);
3399 AssertRC(rc);
3400 return rc;
3401 }
3402 Assert(pPDDst);
3403 pPdeDst = &pPDDst->a[iPDDst];
3404# endif
3405
3406# if defined(IN_RC)
3407 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3408 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3409# endif
3410
3411 if (!pPdeDst->n.u1Present)
3412 {
3413 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3414 if (rc != VINF_SUCCESS)
3415 {
3416# if defined(IN_RC)
3417 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3418 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3419# endif
3420 pgmUnlock(pVM);
3421 AssertRC(rc);
3422 return rc;
3423 }
3424 }
3425
3426# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3427 /* Check for dirty bit fault */
3428 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3429 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3430 Log(("PGMVerifyAccess: success (dirty)\n"));
3431 else
3432 {
3433 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3434# else
3435 {
3436 GSTPDE PdeSrc;
3437 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3438 PdeSrc.n.u1Present = 1;
3439 PdeSrc.n.u1Write = 1;
3440 PdeSrc.n.u1Accessed = 1;
3441 PdeSrc.n.u1User = 1;
3442
3443# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3444 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3445 if (uErr & X86_TRAP_PF_US)
3446 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3447 else /* supervisor */
3448 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3449
3450 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3451 if (RT_SUCCESS(rc))
3452 {
3453 /* Page was successfully synced */
3454 Log2(("PGMVerifyAccess: success (sync)\n"));
3455 rc = VINF_SUCCESS;
3456 }
3457 else
3458 {
3459 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3460 rc = VINF_EM_RAW_GUEST_TRAP;
3461 }
3462 }
3463# if defined(IN_RC)
3464 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3465 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3466# endif
3467 pgmUnlock(pVM);
3468 return rc;
3469
3470#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3471
3472 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3473 return VERR_INTERNAL_ERROR;
3474#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3475}
3476
3477
3478/**
3479 * Syncs the paging hierarchy starting at CR3.
3480 *
3481 * @returns VBox status code, no specials.
3482 * @param pVCpu The VMCPU handle.
3483 * @param cr0 Guest context CR0 register
3484 * @param cr3 Guest context CR3 register
3485 * @param cr4 Guest context CR4 register
3486 * @param fGlobal Including global page directories or not
3487 */
3488PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3489{
3490 PVM pVM = pVCpu->CTX_SUFF(pVM);
3491
3492 LogFlow(("SyncCR3 fGlobal=%d\n", !!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
3493
3494#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3495
3496 pgmLock(pVM);
3497
3498# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3499 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3500 if (pPool->cDirtyPages)
3501 pgmPoolResetDirtyPages(pVM);
3502# endif
3503
3504 /*
3505 * Update page access handlers.
3506 * The virtual are always flushed, while the physical are only on demand.
3507 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3508 * have to look into that later because it will have a bad influence on the performance.
3509 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3510 * bird: Yes, but that won't work for aliases.
3511 */
3512 /** @todo this MUST go away. See #1557. */
3513 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3514 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3515 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3516 pgmUnlock(pVM);
3517#endif /* !NESTED && !EPT */
3518
3519#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3520 /*
3521 * Nested / EPT - almost no work.
3522 */
3523 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3524 return VINF_SUCCESS;
3525
3526#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3527 /*
3528 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3529 * out the shadow parts when the guest modifies its tables.
3530 */
3531 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3532 return VINF_SUCCESS;
3533
3534#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3535
3536# ifndef PGM_WITHOUT_MAPPINGS
3537 /*
3538 * Check for and resolve conflicts with our guest mappings if they
3539 * are enabled and not fixed.
3540 */
3541 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
3542 {
3543 int rc = pgmMapResolveConflicts(pVM);
3544 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3545 if (rc == VINF_PGM_SYNC_CR3)
3546 {
3547 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3548 return VINF_PGM_SYNC_CR3;
3549 }
3550 }
3551# else
3552 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3553# endif
3554 return VINF_SUCCESS;
3555#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3556}
3557
3558
3559
3560
3561#ifdef VBOX_STRICT
3562#ifdef IN_RC
3563# undef AssertMsgFailed
3564# define AssertMsgFailed Log
3565#endif
3566#ifdef IN_RING3
3567# include <VBox/dbgf.h>
3568
3569/**
3570 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3571 *
3572 * @returns VBox status code (VINF_SUCCESS).
3573 * @param cr3 The root of the hierarchy.
3574 * @param crr The cr4, only PAE and PSE is currently used.
3575 * @param fLongMode Set if long mode, false if not long mode.
3576 * @param cMaxDepth Number of levels to dump.
3577 * @param pHlp Pointer to the output functions.
3578 */
3579RT_C_DECLS_BEGIN
3580VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3581RT_C_DECLS_END
3582
3583#endif
3584
3585/**
3586 * Checks that the shadow page table is in sync with the guest one.
3587 *
3588 * @returns The number of errors.
3589 * @param pVM The virtual machine.
3590 * @param pVCpu The VMCPU handle.
3591 * @param cr3 Guest context CR3 register
3592 * @param cr4 Guest context CR4 register
3593 * @param GCPtr Where to start. Defaults to 0.
3594 * @param cb How much to check. Defaults to everything.
3595 */
3596PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3597{
3598#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3599 return 0;
3600#else
3601 unsigned cErrors = 0;
3602 PVM pVM = pVCpu->CTX_SUFF(pVM);
3603 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3604
3605#if PGM_GST_TYPE == PGM_TYPE_PAE
3606 /** @todo currently broken; crashes below somewhere */
3607 AssertFailed();
3608#endif
3609
3610#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3611 || PGM_GST_TYPE == PGM_TYPE_PAE \
3612 || PGM_GST_TYPE == PGM_TYPE_AMD64
3613
3614# if PGM_GST_TYPE == PGM_TYPE_32BIT
3615 bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
3616# else
3617 bool fBigPagesSupported = true;
3618# endif
3619 PPGMCPU pPGM = &pVCpu->pgm.s;
3620 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3621 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3622# ifndef IN_RING0
3623 RTHCPHYS HCPhys; /* general usage. */
3624# endif
3625 int rc;
3626
3627 /*
3628 * Check that the Guest CR3 and all its mappings are correct.
3629 */
3630 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3631 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3632 false);
3633# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3634# if PGM_GST_TYPE == PGM_TYPE_32BIT
3635 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3636# else
3637 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3638# endif
3639 AssertRCReturn(rc, 1);
3640 HCPhys = NIL_RTHCPHYS;
3641 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3642 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3643# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3644 pgmGstGet32bitPDPtr(pPGM);
3645 RTGCPHYS GCPhys;
3646 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3647 AssertRCReturn(rc, 1);
3648 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3649# endif
3650# endif /* !IN_RING0 */
3651
3652 /*
3653 * Get and check the Shadow CR3.
3654 */
3655# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3656 unsigned cPDEs = X86_PG_ENTRIES;
3657 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3658# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3659# if PGM_GST_TYPE == PGM_TYPE_32BIT
3660 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3661# else
3662 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3663# endif
3664 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3665# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3666 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3667 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3668# endif
3669 if (cb != ~(RTGCPTR)0)
3670 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3671
3672/** @todo call the other two PGMAssert*() functions. */
3673
3674# if PGM_GST_TYPE == PGM_TYPE_AMD64
3675 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3676
3677 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3678 {
3679 PPGMPOOLPAGE pShwPdpt = NULL;
3680 PX86PML4E pPml4eSrc;
3681 PX86PML4E pPml4eDst;
3682 RTGCPHYS GCPhysPdptSrc;
3683
3684 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3685 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3686
3687 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3688 if (!pPml4eDst->n.u1Present)
3689 {
3690 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3691 continue;
3692 }
3693
3694 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3695 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3696
3697 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3698 {
3699 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3700 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3701 cErrors++;
3702 continue;
3703 }
3704
3705 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3706 {
3707 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3708 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3709 cErrors++;
3710 continue;
3711 }
3712
3713 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3714 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3715 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3716 {
3717 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3718 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3719 cErrors++;
3720 continue;
3721 }
3722# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3723 {
3724# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3725
3726# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3727 /*
3728 * Check the PDPTEs too.
3729 */
3730 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3731
3732 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3733 {
3734 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3735 PPGMPOOLPAGE pShwPde = NULL;
3736 PX86PDPE pPdpeDst;
3737 RTGCPHYS GCPhysPdeSrc;
3738# if PGM_GST_TYPE == PGM_TYPE_PAE
3739 X86PDPE PdpeSrc;
3740 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3741 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3742# else
3743 PX86PML4E pPml4eSrcIgn;
3744 X86PDPE PdpeSrc;
3745 PX86PDPT pPdptDst;
3746 PX86PDPAE pPDDst;
3747 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3748
3749 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3750 if (rc != VINF_SUCCESS)
3751 {
3752 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3753 GCPtr += 512 * _2M;
3754 continue; /* next PDPTE */
3755 }
3756 Assert(pPDDst);
3757# endif
3758 Assert(iPDSrc == 0);
3759
3760 pPdpeDst = &pPdptDst->a[iPdpt];
3761
3762 if (!pPdpeDst->n.u1Present)
3763 {
3764 GCPtr += 512 * _2M;
3765 continue; /* next PDPTE */
3766 }
3767
3768 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3769 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3770
3771 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3772 {
3773 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3774 GCPtr += 512 * _2M;
3775 cErrors++;
3776 continue;
3777 }
3778
3779 if (GCPhysPdeSrc != pShwPde->GCPhys)
3780 {
3781# if PGM_GST_TYPE == PGM_TYPE_AMD64
3782 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3783# else
3784 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3785# endif
3786 GCPtr += 512 * _2M;
3787 cErrors++;
3788 continue;
3789 }
3790
3791# if PGM_GST_TYPE == PGM_TYPE_AMD64
3792 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3793 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3794 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3795 {
3796 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3797 GCPtr += 512 * _2M;
3798 cErrors++;
3799 continue;
3800 }
3801# endif
3802
3803# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3804 {
3805# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3806# if PGM_GST_TYPE == PGM_TYPE_32BIT
3807 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3808# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3809 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3810# endif
3811# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3812 /*
3813 * Iterate the shadow page directory.
3814 */
3815 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3816 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3817
3818 for (;
3819 iPDDst < cPDEs;
3820 iPDDst++, GCPtr += cIncrement)
3821 {
3822# if PGM_SHW_TYPE == PGM_TYPE_PAE
3823 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3824# else
3825 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3826# endif
3827 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3828 {
3829 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3830 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3831 {
3832 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3833 cErrors++;
3834 continue;
3835 }
3836 }
3837 else if ( (PdeDst.u & X86_PDE_P)
3838 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3839 )
3840 {
3841 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3842 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3843 if (!pPoolPage)
3844 {
3845 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3846 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3847 cErrors++;
3848 continue;
3849 }
3850 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3851
3852 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3853 {
3854 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3855 GCPtr, (uint64_t)PdeDst.u));
3856 cErrors++;
3857 }
3858
3859 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3860 {
3861 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3862 GCPtr, (uint64_t)PdeDst.u));
3863 cErrors++;
3864 }
3865
3866 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3867 if (!PdeSrc.n.u1Present)
3868 {
3869 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3870 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3871 cErrors++;
3872 continue;
3873 }
3874
3875 if ( !PdeSrc.b.u1Size
3876 || !fBigPagesSupported)
3877 {
3878 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3879# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3880 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3881# endif
3882 }
3883 else
3884 {
3885# if PGM_GST_TYPE == PGM_TYPE_32BIT
3886 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3887 {
3888 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3889 GCPtr, (uint64_t)PdeSrc.u));
3890 cErrors++;
3891 continue;
3892 }
3893# endif
3894 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3895# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3896 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3897# endif
3898 }
3899
3900 if ( pPoolPage->enmKind
3901 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3902 {
3903 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3904 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3905 cErrors++;
3906 }
3907
3908 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3909 if (!pPhysPage)
3910 {
3911 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3912 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3913 cErrors++;
3914 continue;
3915 }
3916
3917 if (GCPhysGst != pPoolPage->GCPhys)
3918 {
3919 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3920 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3921 cErrors++;
3922 continue;
3923 }
3924
3925 if ( !PdeSrc.b.u1Size
3926 || !fBigPagesSupported)
3927 {
3928 /*
3929 * Page Table.
3930 */
3931 const GSTPT *pPTSrc;
3932 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3933 if (RT_FAILURE(rc))
3934 {
3935 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3936 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3937 cErrors++;
3938 continue;
3939 }
3940 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3941 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3942 {
3943 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3944 // (This problem will go away when/if we shadow multiple CR3s.)
3945 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3946 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3947 cErrors++;
3948 continue;
3949 }
3950 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3951 {
3952 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3953 GCPtr, (uint64_t)PdeDst.u));
3954 cErrors++;
3955 continue;
3956 }
3957
3958 /* iterate the page table. */
3959# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3960 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3961 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3962# else
3963 const unsigned offPTSrc = 0;
3964# endif
3965 for (unsigned iPT = 0, off = 0;
3966 iPT < RT_ELEMENTS(pPTDst->a);
3967 iPT++, off += PAGE_SIZE)
3968 {
3969 const SHWPTE PteDst = pPTDst->a[iPT];
3970
3971 /* skip not-present entries. */
3972 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3973 continue;
3974 Assert(PteDst.n.u1Present);
3975
3976 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3977 if (!PteSrc.n.u1Present)
3978 {
3979# ifdef IN_RING3
3980 PGMAssertHandlerAndFlagsInSync(pVM);
3981 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3982# endif
3983 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3984 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3985 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3986 cErrors++;
3987 continue;
3988 }
3989
3990 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3991# if 1 /** @todo sync accessed bit properly... */
3992 fIgnoreFlags |= X86_PTE_A;
3993# endif
3994
3995 /* match the physical addresses */
3996 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3997 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3998
3999# ifdef IN_RING3
4000 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4001 if (RT_FAILURE(rc))
4002 {
4003 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4004 {
4005 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4006 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4007 cErrors++;
4008 continue;
4009 }
4010 }
4011 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4012 {
4013 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4014 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4015 cErrors++;
4016 continue;
4017 }
4018# endif
4019
4020 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4021 if (!pPhysPage)
4022 {
4023# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4024 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4025 {
4026 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4027 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4028 cErrors++;
4029 continue;
4030 }
4031# endif
4032 if (PteDst.n.u1Write)
4033 {
4034 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4035 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4036 cErrors++;
4037 }
4038 fIgnoreFlags |= X86_PTE_RW;
4039 }
4040 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4041 {
4042 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4043 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4044 cErrors++;
4045 continue;
4046 }
4047
4048 /* flags */
4049 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4050 {
4051 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4052 {
4053 if (PteDst.n.u1Write)
4054 {
4055 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4056 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4057 cErrors++;
4058 continue;
4059 }
4060 fIgnoreFlags |= X86_PTE_RW;
4061 }
4062 else
4063 {
4064 if (PteDst.n.u1Present)
4065 {
4066 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4067 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4068 cErrors++;
4069 continue;
4070 }
4071 fIgnoreFlags |= X86_PTE_P;
4072 }
4073 }
4074 else
4075 {
4076 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4077 {
4078 if (PteDst.n.u1Write)
4079 {
4080 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4081 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4082 cErrors++;
4083 continue;
4084 }
4085 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4086 {
4087 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4088 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4089 cErrors++;
4090 continue;
4091 }
4092 if (PteDst.n.u1Dirty)
4093 {
4094 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4095 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4096 cErrors++;
4097 }
4098# if 0 /** @todo sync access bit properly... */
4099 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4100 {
4101 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4102 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4103 cErrors++;
4104 }
4105 fIgnoreFlags |= X86_PTE_RW;
4106# else
4107 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4108# endif
4109 }
4110 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4111 {
4112 /* access bit emulation (not implemented). */
4113 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4114 {
4115 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4116 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4117 cErrors++;
4118 continue;
4119 }
4120 if (!PteDst.n.u1Accessed)
4121 {
4122 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4123 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4124 cErrors++;
4125 }
4126 fIgnoreFlags |= X86_PTE_P;
4127 }
4128# ifdef DEBUG_sandervl
4129 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4130# endif
4131 }
4132
4133 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4134 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4135 )
4136 {
4137 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4138 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4139 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4140 cErrors++;
4141 continue;
4142 }
4143 } /* foreach PTE */
4144 }
4145 else
4146 {
4147 /*
4148 * Big Page.
4149 */
4150 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4151 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4152 {
4153 if (PdeDst.n.u1Write)
4154 {
4155 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4156 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4157 cErrors++;
4158 continue;
4159 }
4160 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4161 {
4162 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4163 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4164 cErrors++;
4165 continue;
4166 }
4167# if 0 /** @todo sync access bit properly... */
4168 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4169 {
4170 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4171 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4172 cErrors++;
4173 }
4174 fIgnoreFlags |= X86_PTE_RW;
4175# else
4176 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4177# endif
4178 }
4179 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4180 {
4181 /* access bit emulation (not implemented). */
4182 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4183 {
4184 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4185 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4186 cErrors++;
4187 continue;
4188 }
4189 if (!PdeDst.n.u1Accessed)
4190 {
4191 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4192 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4193 cErrors++;
4194 }
4195 fIgnoreFlags |= X86_PTE_P;
4196 }
4197
4198 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4199 {
4200 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4201 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4202 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4203 cErrors++;
4204 }
4205
4206 /* iterate the page table. */
4207 for (unsigned iPT = 0, off = 0;
4208 iPT < RT_ELEMENTS(pPTDst->a);
4209 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4210 {
4211 const SHWPTE PteDst = pPTDst->a[iPT];
4212
4213 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4214 {
4215 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4216 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4217 cErrors++;
4218 }
4219
4220 /* skip not-present entries. */
4221 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4222 continue;
4223
4224 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4225
4226 /* match the physical addresses */
4227 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4228
4229# ifdef IN_RING3
4230 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4231 if (RT_FAILURE(rc))
4232 {
4233 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4234 {
4235 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4236 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4237 cErrors++;
4238 }
4239 }
4240 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4241 {
4242 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4243 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4244 cErrors++;
4245 continue;
4246 }
4247# endif
4248 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4249 if (!pPhysPage)
4250 {
4251# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4252 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4253 {
4254 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4255 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4256 cErrors++;
4257 continue;
4258 }
4259# endif
4260 if (PteDst.n.u1Write)
4261 {
4262 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4263 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4264 cErrors++;
4265 }
4266 fIgnoreFlags |= X86_PTE_RW;
4267 }
4268 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4269 {
4270 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4271 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4272 cErrors++;
4273 continue;
4274 }
4275
4276 /* flags */
4277 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4278 {
4279 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4280 {
4281 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4282 {
4283 if (PteDst.n.u1Write)
4284 {
4285 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4286 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4287 cErrors++;
4288 continue;
4289 }
4290 fIgnoreFlags |= X86_PTE_RW;
4291 }
4292 }
4293 else
4294 {
4295 if (PteDst.n.u1Present)
4296 {
4297 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4298 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4299 cErrors++;
4300 continue;
4301 }
4302 fIgnoreFlags |= X86_PTE_P;
4303 }
4304 }
4305
4306 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4307 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4308 )
4309 {
4310 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4311 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4312 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4313 cErrors++;
4314 continue;
4315 }
4316 } /* for each PTE */
4317 }
4318 }
4319 /* not present */
4320
4321 } /* for each PDE */
4322
4323 } /* for each PDPTE */
4324
4325 } /* for each PML4E */
4326
4327# ifdef DEBUG
4328 if (cErrors)
4329 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4330# endif
4331
4332#endif /* GST == 32BIT, PAE or AMD64 */
4333 return cErrors;
4334
4335#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4336}
4337#endif /* VBOX_STRICT */
4338
4339
4340/**
4341 * Sets up the CR3 for shadow paging
4342 *
4343 * @returns Strict VBox status code.
4344 * @retval VINF_SUCCESS.
4345 *
4346 * @param pVCpu The VMCPU handle.
4347 * @param GCPhysCR3 The physical address in the CR3 register.
4348 */
4349PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4350{
4351 PVM pVM = pVCpu->CTX_SUFF(pVM);
4352
4353 /* Update guest paging info. */
4354#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4355 || PGM_GST_TYPE == PGM_TYPE_PAE \
4356 || PGM_GST_TYPE == PGM_TYPE_AMD64
4357
4358 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4359
4360 /*
4361 * Map the page CR3 points at.
4362 */
4363 RTHCPTR HCPtrGuestCR3;
4364 RTHCPHYS HCPhysGuestCR3;
4365 pgmLock(pVM);
4366 PPGMPAGE pPageCR3 = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4367 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4368 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4369 /** @todo this needs some reworking wrt. locking. */
4370# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4371 HCPtrGuestCR3 = NIL_RTHCPTR;
4372 int rc = VINF_SUCCESS;
4373# else
4374 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4375# endif
4376 pgmUnlock(pVM);
4377 if (RT_SUCCESS(rc))
4378 {
4379 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4380 if (RT_SUCCESS(rc))
4381 {
4382# ifdef IN_RC
4383 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4384# endif
4385# if PGM_GST_TYPE == PGM_TYPE_32BIT
4386 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4387# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4388 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4389# endif
4390 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4391
4392# elif PGM_GST_TYPE == PGM_TYPE_PAE
4393 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4394 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4395# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4396 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4397# endif
4398 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4399 Log(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4400
4401 /*
4402 * Map the 4 PDs too.
4403 */
4404 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVCpu->pgm.s);
4405 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4406 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4407 {
4408 if (pGuestPDPT->a[i].n.u1Present)
4409 {
4410 RTHCPTR HCPtr;
4411 RTHCPHYS HCPhys;
4412 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4413 pgmLock(pVM);
4414 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4415 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4416 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4417# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4418 HCPtr = NIL_RTHCPTR;
4419 int rc2 = VINF_SUCCESS;
4420# else
4421 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4422# endif
4423 pgmUnlock(pVM);
4424 if (RT_SUCCESS(rc2))
4425 {
4426 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4427 AssertRCReturn(rc, rc);
4428
4429 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4430# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4431 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4432# endif
4433 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4434 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4435# ifdef IN_RC
4436 PGM_INVL_PG(pVCpu, GCPtr);
4437# endif
4438 continue;
4439 }
4440 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4441 }
4442
4443 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4444# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4445 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4446# endif
4447 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4448 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4449# ifdef IN_RC
4450 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4451# endif
4452 }
4453
4454# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4455 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4456# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4457 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4458# endif
4459# endif
4460 }
4461 else
4462 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4463 }
4464 else
4465 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4466
4467#else /* prot/real stub */
4468 int rc = VINF_SUCCESS;
4469#endif
4470
4471 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4472# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4473 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4474 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4475 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4476 && PGM_GST_TYPE != PGM_TYPE_PROT))
4477
4478 Assert(!HWACCMIsNestedPagingActive(pVM));
4479
4480 /*
4481 * Update the shadow root page as well since that's not fixed.
4482 */
4483 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4484 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4485 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4486 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4487 PPGMPOOLPAGE pNewShwPageCR3;
4488
4489 pgmLock(pVM);
4490
4491# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4492 if (pPool->cDirtyPages)
4493 pgmPoolResetDirtyPages(pVM);
4494# endif
4495
4496 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4497 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4498 AssertFatalRC(rc);
4499 rc = VINF_SUCCESS;
4500
4501# ifdef IN_RC
4502 /*
4503 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4504 * state will be inconsistent! Flush important things now while
4505 * we still can and then make sure there are no ring-3 calls.
4506 */
4507 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4508 VMMRZCallRing3Disable(pVCpu);
4509# endif
4510
4511 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4512 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4513 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4514# ifdef IN_RING0
4515 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4516 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4517# elif defined(IN_RC)
4518 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4519 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4520# else
4521 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4522 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4523# endif
4524
4525# ifndef PGM_WITHOUT_MAPPINGS
4526 /*
4527 * Apply all hypervisor mappings to the new CR3.
4528 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4529 * make sure we check for conflicts in the new CR3 root.
4530 */
4531# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4532 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4533# endif
4534 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4535 AssertRCReturn(rc, rc);
4536# endif
4537
4538 /* Set the current hypervisor CR3. */
4539 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4540 SELMShadowCR3Changed(pVM, pVCpu);
4541
4542# ifdef IN_RC
4543 /* NOTE: The state is consistent again. */
4544 VMMRZCallRing3Enable(pVCpu);
4545# endif
4546
4547 /* Clean up the old CR3 root. */
4548 if ( pOldShwPageCR3
4549 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4550 {
4551 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4552# ifndef PGM_WITHOUT_MAPPINGS
4553 /* Remove the hypervisor mappings from the shadow page table. */
4554 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4555# endif
4556 /* Mark the page as unlocked; allow flushing again. */
4557 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4558
4559 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4560 }
4561 pgmUnlock(pVM);
4562# endif
4563
4564 return rc;
4565}
4566
4567/**
4568 * Unmaps the shadow CR3.
4569 *
4570 * @returns VBox status, no specials.
4571 * @param pVCpu The VMCPU handle.
4572 */
4573PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4574{
4575 LogFlow(("UnmapCR3\n"));
4576
4577 int rc = VINF_SUCCESS;
4578 PVM pVM = pVCpu->CTX_SUFF(pVM);
4579
4580 /*
4581 * Update guest paging info.
4582 */
4583#if PGM_GST_TYPE == PGM_TYPE_32BIT
4584 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4585# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4586 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4587# endif
4588 pVCpu->pgm.s.pGst32BitPdRC = 0;
4589
4590#elif PGM_GST_TYPE == PGM_TYPE_PAE
4591 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4592# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4593 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4594# endif
4595 pVCpu->pgm.s.pGstPaePdptRC = 0;
4596 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4597 {
4598 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4599# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4600 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4601# endif
4602 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4603 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4604 }
4605
4606#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4607 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4608# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4609 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4610# endif
4611
4612#else /* prot/real mode stub */
4613 /* nothing to do */
4614#endif
4615
4616#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4617 /*
4618 * Update shadow paging info.
4619 */
4620# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4621 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4622 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4623
4624# if PGM_GST_TYPE != PGM_TYPE_REAL
4625 Assert(!HWACCMIsNestedPagingActive(pVM));
4626# endif
4627
4628 pgmLock(pVM);
4629
4630# ifndef PGM_WITHOUT_MAPPINGS
4631 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4632 /* Remove the hypervisor mappings from the shadow page table. */
4633 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4634# endif
4635
4636 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4637 {
4638 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4639
4640 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4641
4642# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4643 if (pPool->cDirtyPages)
4644 pgmPoolResetDirtyPages(pVM);
4645# endif
4646
4647 /* Mark the page as unlocked; allow flushing again. */
4648 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4649
4650 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4651 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4652 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4653 pVCpu->pgm.s.pShwPageCR3RC = 0;
4654 pVCpu->pgm.s.iShwUser = 0;
4655 pVCpu->pgm.s.iShwUserTable = 0;
4656 }
4657 pgmUnlock(pVM);
4658# endif
4659#endif /* !IN_RC*/
4660
4661 return rc;
4662}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette