VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 27814

Last change on this file since 27814 was 27814, checked in by vboxsync, 15 years ago

Backed out part of r59436

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 198.8 KB
Line 
1/* $Id: PGMAllBth.h 27814 2010-03-30 08:42:23Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27RT_C_DECLS_BEGIN
28PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
29PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
34PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
35PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
36PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
37#ifdef VBOX_STRICT
38PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
39#endif
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
42PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
43RT_C_DECLS_END
44
45
46/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
47#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
48# error "Invalid combination; PAE guest implies PAE shadow"
49#endif
50
51#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
52 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
53# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
54#endif
55
56#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
57 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
58# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
59#endif
60
61#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
62 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
63# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
64#endif
65
66
67#ifndef IN_RING3
68/**
69 * #PF Handler for raw-mode guest execution.
70 *
71 * @returns VBox status code (appropriate for trap handling and GC return).
72 *
73 * @param pVCpu VMCPU Handle.
74 * @param uErr The trap error code.
75 * @param pRegFrame Trap register frame.
76 * @param pvFault The fault address.
77 * @param pfLockTaken PGM lock taken here or not (out)
78 */
79PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
80{
81 PVM pVM = pVCpu->CTX_SUFF(pVM);
82
83 *pfLockTaken = false;
84
85# if defined(IN_RC) && defined(VBOX_STRICT)
86 PGMDynCheckLocks(pVM);
87# endif
88
89# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
90 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
91 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
92
93# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
94 /*
95 * Hide the instruction fetch trap indicator for now.
96 */
97 /** @todo NXE will change this and we must fix NXE in the switcher too! */
98 if (uErr & X86_TRAP_PF_ID)
99 {
100 uErr &= ~X86_TRAP_PF_ID;
101 TRPMSetErrorCode(pVCpu, uErr);
102 }
103# endif
104
105 /*
106 * Get PDs.
107 */
108 int rc;
109# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
110# if PGM_GST_TYPE == PGM_TYPE_32BIT
111 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
112 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
113
114# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
115
116# if PGM_GST_TYPE == PGM_TYPE_PAE
117 unsigned iPDSrc = 0; /* initialized to shut up gcc */
118 X86PDPE PdpeSrc;
119 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
120
121# elif PGM_GST_TYPE == PGM_TYPE_AMD64
122 unsigned iPDSrc = 0; /* initialized to shut up gcc */
123 PX86PML4E pPml4eSrc;
124 X86PDPE PdpeSrc;
125 PGSTPD pPDSrc;
126
127 pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
128 Assert(pPml4eSrc);
129# endif
130
131 /* Quick check for a valid guest trap. (PAE & AMD64) */
132 if (!pPDSrc)
133 {
134# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
135 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
136# else
137 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
138# endif
139 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
140 TRPMSetErrorCode(pVCpu, uErr);
141 return VINF_EM_RAW_GUEST_TRAP;
142 }
143# endif
144
145# else /* !PGM_WITH_PAGING */
146 PGSTPD pPDSrc = NULL;
147 const unsigned iPDSrc = 0;
148# endif /* !PGM_WITH_PAGING */
149
150 /* First check for a genuine guest page fault. */
151# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
152 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
153 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDSrc->a[iPDSrc], pvFault);
154 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
155 if (rc == VINF_EM_RAW_GUEST_TRAP)
156 {
157 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
158 = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
159 return rc;
160 }
161# endif /* PGM_WITH_PAGING */
162
163 /* Take the big lock now. */
164 *pfLockTaken = true;
165 pgmLock(pVM);
166
167 /* Fetch the guest PDE */
168# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
169 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
170# else
171 GSTPDE PdeSrc;
172 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
173 PdeSrc.n.u1Present = 1;
174 PdeSrc.n.u1Write = 1;
175 PdeSrc.n.u1Accessed = 1;
176 PdeSrc.n.u1User = 1;
177# endif
178
179# if PGM_SHW_TYPE == PGM_TYPE_32BIT
180 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
181 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
182
183# elif PGM_SHW_TYPE == PGM_TYPE_PAE
184 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
185
186 PX86PDPAE pPDDst;
187# if PGM_GST_TYPE != PGM_TYPE_PAE
188 X86PDPE PdpeSrc;
189
190 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
191 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
192# endif
193 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
194 if (rc != VINF_SUCCESS)
195 {
196 AssertRC(rc);
197 return rc;
198 }
199 Assert(pPDDst);
200
201# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
202 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
203 PX86PDPAE pPDDst;
204# if PGM_GST_TYPE == PGM_TYPE_PROT
205 /* AMD-V nested paging */
206 X86PML4E Pml4eSrc;
207 X86PDPE PdpeSrc;
208 PX86PML4E pPml4eSrc = &Pml4eSrc;
209
210 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
211 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
212 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
213# endif
214
215 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
216 if (rc != VINF_SUCCESS)
217 {
218 AssertRC(rc);
219 return rc;
220 }
221 Assert(pPDDst);
222
223# elif PGM_SHW_TYPE == PGM_TYPE_EPT
224 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
225 PEPTPD pPDDst;
226
227 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
228 if (rc != VINF_SUCCESS)
229 {
230 AssertRC(rc);
231 return rc;
232 }
233 Assert(pPDDst);
234# endif
235
236# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
237 /* Dirty page handling. */
238 if (uErr & X86_TRAP_PF_RW) /* write fault? */
239 {
240 /*
241 * If we successfully correct the write protection fault due to dirty bit
242 * tracking, then return immediately.
243 */
244 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
245 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
246 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
247 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
248 {
249 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
250 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
251 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
252 return VINF_SUCCESS;
253 }
254 }
255
256# if 0 /* rarely useful; leave for debugging. */
257 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
258# endif
259# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
260
261 /*
262 * A common case is the not-present error caused by lazy page table syncing.
263 *
264 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
265 * so we can safely assume that the shadow PT is present when calling SyncPage later.
266 *
267 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
268 * of mapping conflict and defer to SyncCR3 in R3.
269 * (Again, we do NOT support access handlers for non-present guest pages.)
270 *
271 */
272 Assert(PdeSrc.n.u1Present);
273 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
274 && !pPDDst->a[iPDDst].n.u1Present
275 )
276 {
277 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
278 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
279 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
280 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
281 if (RT_SUCCESS(rc))
282 {
283 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
284 return rc;
285 }
286 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
287 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
288 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
289 return VINF_PGM_SYNC_CR3;
290 }
291
292# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
293 /*
294 * Check if this address is within any of our mappings.
295 *
296 * This is *very* fast and it's gonna save us a bit of effort below and prevent
297 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
298 * (BTW, it's impossible to have physical access handlers in a mapping.)
299 */
300 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
301 {
302 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
303 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
304 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
305 {
306 if (pvFault < pMapping->GCPtr)
307 break;
308 if (pvFault - pMapping->GCPtr < pMapping->cb)
309 {
310 /*
311 * The first thing we check is if we've got an undetected conflict.
312 */
313 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
314 {
315 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
316 while (iPT-- > 0)
317 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
318 {
319 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
320 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
321 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
322 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
323 return VINF_PGM_SYNC_CR3;
324 }
325 }
326
327 /*
328 * Check if the fault address is in a virtual page access handler range.
329 */
330 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
331 if ( pCur
332 && pvFault - pCur->Core.Key < pCur->cb
333 && uErr & X86_TRAP_PF_RW)
334 {
335# ifdef IN_RC
336 STAM_PROFILE_START(&pCur->Stat, h);
337 pgmUnlock(pVM);
338 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
339 pgmLock(pVM);
340 STAM_PROFILE_STOP(&pCur->Stat, h);
341# else
342 AssertFailed();
343 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
344# endif
345 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
346 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
347 return rc;
348 }
349
350 /*
351 * Pretend we're not here and let the guest handle the trap.
352 */
353 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
354 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
355 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
356 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
357 return VINF_EM_RAW_GUEST_TRAP;
358 }
359 }
360 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
361 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
362# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
363
364 /*
365 * Check if this fault address is flagged for special treatment,
366 * which means we'll have to figure out the physical address and
367 * check flags associated with it.
368 *
369 * ASSUME that we can limit any special access handling to pages
370 * in page tables which the guest believes to be present.
371 */
372 Assert(PdeSrc.n.u1Present);
373 {
374 RTGCPHYS GCPhys = NIL_RTGCPHYS;
375
376# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
377 if ( PdeSrc.b.u1Size
378# if PGM_GST_TYPE == PGM_TYPE_32BIT
379 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
380# endif
381 )
382 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
383 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
384 else
385 {
386 PGSTPT pPTSrc;
387 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
388 if (RT_SUCCESS(rc))
389 {
390 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
391 if (pPTSrc->a[iPTESrc].n.u1Present)
392 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
393 }
394 }
395# else
396 /* No paging so the fault address is the physical address */
397 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
398# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
399
400 /*
401 * If we have a GC address we'll check if it has any flags set.
402 */
403 if (GCPhys != NIL_RTGCPHYS)
404 {
405 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
406
407 PPGMPAGE pPage;
408 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
409 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
410 {
411 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
412 {
413 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
414 {
415 /*
416 * Physical page access handler.
417 */
418 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
419 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
420 if (pCur)
421 {
422# ifdef PGM_SYNC_N_PAGES
423 /*
424 * If the region is write protected and we got a page not present fault, then sync
425 * the pages. If the fault was caused by a read, then restart the instruction.
426 * In case of write access continue to the GC write handler.
427 *
428 * ASSUMES that there is only one handler per page or that they have similar write properties.
429 */
430 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
431 && !(uErr & X86_TRAP_PF_P))
432 {
433 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
434 if ( RT_FAILURE(rc)
435 || !(uErr & X86_TRAP_PF_RW)
436 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
437 {
438 AssertRC(rc);
439 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
440 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
441 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
442 return rc;
443 }
444 }
445# endif
446
447 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
448 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
449 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
450
451# if defined(IN_RC) || defined(IN_RING0)
452 if (pCur->CTX_SUFF(pfnHandler))
453 {
454 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
455# ifdef IN_RING0
456 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
457# else
458 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
459# endif
460 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
461 void *pvUser = pCur->CTX_SUFF(pvUser);
462
463 STAM_PROFILE_START(&pCur->Stat, h);
464 if (fLeaveLock)
465 pgmUnlock(pVM); /* @todo: Not entirely safe. */
466
467 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
468 if (fLeaveLock)
469 pgmLock(pVM);
470# ifdef VBOX_WITH_STATISTICS
471 pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
472 if (pCur)
473 STAM_PROFILE_STOP(&pCur->Stat, h);
474# else
475 pCur = NULL; /* might be invalid by now. */
476# endif
477
478 }
479 else
480# endif
481 rc = VINF_EM_RAW_EMULATE_INSTR;
482
483 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
484 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
485 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
486 return rc;
487 }
488 }
489# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
490 else
491 {
492# ifdef PGM_SYNC_N_PAGES
493 /*
494 * If the region is write protected and we got a page not present fault, then sync
495 * the pages. If the fault was caused by a read, then restart the instruction.
496 * In case of write access continue to the GC write handler.
497 */
498 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
499 && !(uErr & X86_TRAP_PF_P))
500 {
501 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
502 if ( RT_FAILURE(rc)
503 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
504 || !(uErr & X86_TRAP_PF_RW))
505 {
506 AssertRC(rc);
507 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
508 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
509 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
510 return rc;
511 }
512 }
513# endif
514 /*
515 * Ok, it's an virtual page access handler.
516 *
517 * Since it's faster to search by address, we'll do that first
518 * and then retry by GCPhys if that fails.
519 */
520 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
521 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
522 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
523 */
524 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
525 if (pCur)
526 {
527 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
528 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
529 || !(uErr & X86_TRAP_PF_P)
530 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
531 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
532
533 if ( pvFault - pCur->Core.Key < pCur->cb
534 && ( uErr & X86_TRAP_PF_RW
535 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
536 {
537# ifdef IN_RC
538 STAM_PROFILE_START(&pCur->Stat, h);
539 pgmUnlock(pVM);
540 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
541 pgmLock(pVM);
542 STAM_PROFILE_STOP(&pCur->Stat, h);
543# else
544 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
545# endif
546 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
547 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
548 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
549 return rc;
550 }
551 /* Unhandled part of a monitored page */
552 }
553 else
554 {
555 /* Check by physical address. */
556 unsigned iPage;
557 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
558 &pCur, &iPage);
559 Assert(RT_SUCCESS(rc) || !pCur);
560 if ( pCur
561 && ( uErr & X86_TRAP_PF_RW
562 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
563 {
564 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
565# ifdef IN_RC
566 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
567 Assert(off < pCur->cb);
568 STAM_PROFILE_START(&pCur->Stat, h);
569 pgmUnlock(pVM);
570 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
571 pgmLock(pVM);
572 STAM_PROFILE_STOP(&pCur->Stat, h);
573# else
574 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
575# endif
576 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
577 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
578 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
579 return rc;
580 }
581 }
582 }
583# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
584
585 /*
586 * There is a handled area of the page, but this fault doesn't belong to it.
587 * We must emulate the instruction.
588 *
589 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
590 * we first check if this was a page-not-present fault for a page with only
591 * write access handlers. Restart the instruction if it wasn't a write access.
592 */
593 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
594
595 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
596 && !(uErr & X86_TRAP_PF_P))
597 {
598 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
599 if ( RT_FAILURE(rc)
600 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
601 || !(uErr & X86_TRAP_PF_RW))
602 {
603 AssertRC(rc);
604 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
605 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
606 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
607 return rc;
608 }
609 }
610
611 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
612 * It's writing to an unhandled part of the LDT page several million times.
613 */
614 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
615 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
616 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
617 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
618 return rc;
619 } /* if any kind of handler */
620
621# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
622 if (uErr & X86_TRAP_PF_P)
623 {
624 /*
625 * The page isn't marked, but it might still be monitored by a virtual page access handler.
626 * (ASSUMES no temporary disabling of virtual handlers.)
627 */
628 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
629 * we should correct both the shadow page table and physical memory flags, and not only check for
630 * accesses within the handler region but for access to pages with virtual handlers. */
631 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
632 if (pCur)
633 {
634 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
635 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
636 || !(uErr & X86_TRAP_PF_P)
637 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
638 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
639
640 if ( pvFault - pCur->Core.Key < pCur->cb
641 && ( uErr & X86_TRAP_PF_RW
642 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
643 {
644# ifdef IN_RC
645 STAM_PROFILE_START(&pCur->Stat, h);
646 pgmUnlock(pVM);
647 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
648 pgmLock(pVM);
649 STAM_PROFILE_STOP(&pCur->Stat, h);
650# else
651 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
652# endif
653 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
654 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
655 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
656 return rc;
657 }
658 }
659 }
660# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
661 }
662 else
663 {
664 /*
665 * When the guest accesses invalid physical memory (e.g. probing
666 * of RAM or accessing a remapped MMIO range), then we'll fall
667 * back to the recompiler to emulate the instruction.
668 */
669 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
670 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
671 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
672 return VINF_EM_RAW_EMULATE_INSTR;
673 }
674
675 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
676
677# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
678 /*
679 * We are here only if page is present in Guest page tables and
680 * trap is not handled by our handlers.
681 *
682 * Check it for page out-of-sync situation.
683 */
684 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
685
686 if (!(uErr & X86_TRAP_PF_P))
687 {
688 /*
689 * Page is not present in our page tables.
690 * Try to sync it!
691 * BTW, fPageShw is invalid in this branch!
692 */
693 if (uErr & X86_TRAP_PF_US)
694 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
695 else /* supervisor */
696 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
697
698 if (PGM_PAGE_IS_BALLOONED(pPage))
699 {
700 /* Emulate reads from ballooned pages as they are not present in our shadow page tables. (required for e.g. Solaris guests; soft ecc, random nr generator) */
701 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
702 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
703 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
704 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
705 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
706 return rc;
707 }
708# if defined(LOG_ENABLED) && !defined(IN_RING0)
709 RTGCPHYS GCPhys2;
710 uint64_t fPageGst2;
711 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
712 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
713 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
714# endif /* LOG_ENABLED */
715
716# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
717 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
718 {
719 uint64_t fPageGst;
720 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
721 if ( RT_SUCCESS(rc)
722 && !(fPageGst & X86_PTE_US))
723 {
724 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
725 if ( pvFault == (RTGCPTR)pRegFrame->eip
726 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
727# ifdef CSAM_DETECT_NEW_CODE_PAGES
728 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
729 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
730# endif /* CSAM_DETECT_NEW_CODE_PAGES */
731 )
732 {
733 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
734 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
735 if (rc != VINF_SUCCESS)
736 {
737 /*
738 * CSAM needs to perform a job in ring 3.
739 *
740 * Sync the page before going to the host context; otherwise we'll end up in a loop if
741 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
742 */
743 LogFlow(("CSAM ring 3 job\n"));
744 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
745 AssertRC(rc2);
746
747 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
748 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
749 return rc;
750 }
751 }
752# ifdef CSAM_DETECT_NEW_CODE_PAGES
753 else if ( uErr == X86_TRAP_PF_RW
754 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
755 && pRegFrame->ecx < 0x10000)
756 {
757 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
758 * to detect loading of new code pages.
759 */
760
761 /*
762 * Decode the instruction.
763 */
764 RTGCPTR PC;
765 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
766 if (rc == VINF_SUCCESS)
767 {
768 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
769 uint32_t cbOp;
770 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
771
772 /* For now we'll restrict this to rep movsw/d instructions */
773 if ( rc == VINF_SUCCESS
774 && pDis->pCurInstr->opcode == OP_MOVSWD
775 && (pDis->prefix & PREFIX_REP))
776 {
777 CSAMMarkPossibleCodePage(pVM, pvFault);
778 }
779 }
780 }
781# endif /* CSAM_DETECT_NEW_CODE_PAGES */
782
783 /*
784 * Mark this page as safe.
785 */
786 /** @todo not correct for pages that contain both code and data!! */
787 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
788 CSAMMarkPage(pVM, pvFault, true);
789 }
790 }
791# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
792 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
793 if (RT_SUCCESS(rc))
794 {
795 /* The page was successfully synced, return to the guest. */
796 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
797 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
798 return VINF_SUCCESS;
799 }
800 }
801 else /* uErr & X86_TRAP_PF_P: */
802 {
803 /*
804 * Write protected pages are make writable when the guest makes the first
805 * write to it. This happens for pages that are shared, write monitored
806 * and not yet allocated.
807 *
808 * Also, a side effect of not flushing global PDEs are out of sync pages due
809 * to physical monitored regions, that are no longer valid.
810 * Assume for now it only applies to the read/write flag.
811 */
812 if ( RT_SUCCESS(rc)
813 && (uErr & X86_TRAP_PF_RW))
814 {
815 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
816 {
817 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
818 Assert(!PGM_PAGE_IS_ZERO(pPage));
819 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
820
821 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
822 if (rc != VINF_SUCCESS)
823 {
824 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
825 return rc;
826 }
827 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
828 return VINF_EM_NO_MEMORY;
829 }
830
831# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
832 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
833 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
834 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG))
835 {
836 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
837 uint64_t fPageGst;
838 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
839 if ( RT_SUCCESS(rc)
840 && !(fPageGst & X86_PTE_RW))
841 {
842 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
843 if (RT_SUCCESS(rc))
844 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
845 else
846 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
847 return rc;
848 }
849 AssertMsg(RT_SUCCESS(rc), ("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
850 }
851# endif
852 /// @todo count the above case; else
853 if (uErr & X86_TRAP_PF_US)
854 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
855 else /* supervisor */
856 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
857
858 /*
859 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
860 * page is not present, which is not true in this case.
861 */
862 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
863 if (RT_SUCCESS(rc))
864 {
865 /*
866 * Page was successfully synced, return to guest.
867 * First invalidate the page as it might be in the TLB.
868 */
869# if PGM_SHW_TYPE == PGM_TYPE_EPT
870 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
871# else
872 PGM_INVL_PG(pVCpu, pvFault);
873# endif
874# ifdef VBOX_STRICT
875 RTGCPHYS GCPhys2;
876 uint64_t fPageGst;
877 if (!HWACCMIsNestedPagingActive(pVM))
878 {
879 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
880 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%d fPageGst=%RX64\n"));
881 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
882 }
883 uint64_t fPageShw;
884 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
885 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
886# endif /* VBOX_STRICT */
887 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
888 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
889 return VINF_SUCCESS;
890 }
891 }
892
893# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
894# ifdef VBOX_STRICT
895 /*
896 * Check for VMM page flags vs. Guest page flags consistency.
897 * Currently only for debug purposes.
898 */
899 if (RT_SUCCESS(rc))
900 {
901 /* Get guest page flags. */
902 uint64_t fPageGst;
903 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
904 if (RT_SUCCESS(rc))
905 {
906 uint64_t fPageShw;
907 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
908
909 /*
910 * Compare page flags.
911 * Note: we have AVL, A, D bits desynched.
912 */
913 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
914 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
915 }
916 else
917 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
918 }
919 else
920 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
921# endif /* VBOX_STRICT */
922# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
923 }
924 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
925# endif /* PGM_OUT_OF_SYNC_IN_GC */
926 }
927 else /* GCPhys == NIL_RTGCPHYS */
928 {
929 /*
930 * Page not present in Guest OS or invalid page table address.
931 * This is potential virtual page access handler food.
932 *
933 * For the present we'll say that our access handlers don't
934 * work for this case - we've already discarded the page table
935 * not present case which is identical to this.
936 *
937 * When we perchance find we need this, we will probably have AVL
938 * trees (offset based) to operate on and we can measure their speed
939 * agains mapping a page table and probably rearrange this handling
940 * a bit. (Like, searching virtual ranges before checking the
941 * physical address.)
942 */
943 }
944 }
945
946# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
947 /*
948 * Conclusion, this is a guest trap.
949 */
950 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
951 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
952 return VINF_EM_RAW_GUEST_TRAP;
953# else
954 /* present, but not a monitored page; perhaps the guest is probing physical memory */
955 return VINF_EM_RAW_EMULATE_INSTR;
956# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
957
958
959# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
960
961 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
962 return VERR_INTERNAL_ERROR;
963# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
964}
965#endif /* !IN_RING3 */
966
967
968/**
969 * Emulation of the invlpg instruction.
970 *
971 *
972 * @returns VBox status code.
973 *
974 * @param pVCpu The VMCPU handle.
975 * @param GCPtrPage Page to invalidate.
976 *
977 * @remark ASSUMES that the guest is updating before invalidating. This order
978 * isn't required by the CPU, so this is speculative and could cause
979 * trouble.
980 * @remark No TLB shootdown is done on any other VCPU as we assume that
981 * invlpg emulation is the *only* reason for calling this function.
982 * (The guest has to shoot down TLB entries on other CPUs itself)
983 * Currently true, but keep in mind!
984 *
985 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
986 */
987PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
988{
989#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
990 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
991 && PGM_SHW_TYPE != PGM_TYPE_EPT
992 int rc;
993 PVM pVM = pVCpu->CTX_SUFF(pVM);
994 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
995
996 Assert(PGMIsLockOwner(pVM));
997
998 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
999
1000# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1001 if (pPool->cDirtyPages)
1002 pgmPoolResetDirtyPages(pVM);
1003# endif
1004
1005 /*
1006 * Get the shadow PD entry and skip out if this PD isn't present.
1007 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1008 */
1009# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1010 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1011 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1012
1013 /* Fetch the pgm pool shadow descriptor. */
1014 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1015 Assert(pShwPde);
1016
1017# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1018 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1019 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
1020
1021 /* If the shadow PDPE isn't present, then skip the invalidate. */
1022 if (!pPdptDst->a[iPdpt].n.u1Present)
1023 {
1024 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1025 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1026 return VINF_SUCCESS;
1027 }
1028
1029 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1030 PPGMPOOLPAGE pShwPde = NULL;
1031 PX86PDPAE pPDDst;
1032
1033 /* Fetch the pgm pool shadow descriptor. */
1034 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1035 AssertRCSuccessReturn(rc, rc);
1036 Assert(pShwPde);
1037
1038 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1039 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1040
1041# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1042 /* PML4 */
1043 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1044 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1045 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1046 PX86PDPAE pPDDst;
1047 PX86PDPT pPdptDst;
1048 PX86PML4E pPml4eDst;
1049 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1050 if (rc != VINF_SUCCESS)
1051 {
1052 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1053 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1054 return VINF_SUCCESS;
1055 }
1056 Assert(pPDDst);
1057
1058 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1059 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1060
1061 if (!pPdpeDst->n.u1Present)
1062 {
1063 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1064 return VINF_SUCCESS;
1065 }
1066
1067 /* Fetch the pgm pool shadow descriptor. */
1068 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1069 Assert(pShwPde);
1070
1071# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1072
1073 const SHWPDE PdeDst = *pPdeDst;
1074 if (!PdeDst.n.u1Present)
1075 {
1076 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1077 return VINF_SUCCESS;
1078 }
1079
1080# if defined(IN_RC)
1081 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1082 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1083# endif
1084
1085 /*
1086 * Get the guest PD entry and calc big page.
1087 */
1088# if PGM_GST_TYPE == PGM_TYPE_32BIT
1089 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
1090 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1091 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1092# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1093 unsigned iPDSrc = 0;
1094# if PGM_GST_TYPE == PGM_TYPE_PAE
1095 X86PDPE PdpeSrc;
1096 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1097# else /* AMD64 */
1098 PX86PML4E pPml4eSrc;
1099 X86PDPE PdpeSrc;
1100 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1101# endif
1102 GSTPDE PdeSrc;
1103
1104 if (pPDSrc)
1105 PdeSrc = pPDSrc->a[iPDSrc];
1106 else
1107 PdeSrc.u = 0;
1108# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1109
1110# if PGM_GST_TYPE == PGM_TYPE_32BIT
1111 const bool fIsBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1112# else
1113 const bool fIsBigPage = PdeSrc.b.u1Size;
1114# endif
1115
1116# ifdef IN_RING3
1117 /*
1118 * If a CR3 Sync is pending we may ignore the invalidate page operation
1119 * depending on the kind of sync and if it's a global page or not.
1120 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1121 */
1122# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1123 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1124 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1125 && fIsBigPage
1126 && PdeSrc.b.u1Global
1127 )
1128 )
1129# else
1130 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1131# endif
1132 {
1133 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1134 return VINF_SUCCESS;
1135 }
1136# endif /* IN_RING3 */
1137
1138 /*
1139 * Deal with the Guest PDE.
1140 */
1141 rc = VINF_SUCCESS;
1142 if (PdeSrc.n.u1Present)
1143 {
1144 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1145 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1146# ifndef PGM_WITHOUT_MAPPING
1147 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1148 {
1149 /*
1150 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1151 */
1152 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1153 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1154 pgmLock(pVM);
1155 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1156 pgmUnlock(pVM);
1157 }
1158 else
1159# endif /* !PGM_WITHOUT_MAPPING */
1160 if (!fIsBigPage)
1161 {
1162 /*
1163 * 4KB - page.
1164 */
1165 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1166 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1167
1168# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1169 /* Reset the modification counter (OpenSolaris trashes tlb entries very often) */
1170 if (pShwPage->cModifications)
1171 pShwPage->cModifications = 1;
1172# endif
1173
1174# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1175 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1176 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1177# endif
1178 if (pShwPage->GCPhys == GCPhys)
1179 {
1180# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1181 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1182 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1183 if (pPT->a[iPTEDst].n.u1Present)
1184 {
1185 /* This is very unlikely with caching/monitoring enabled. */
1186 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1187 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1188 }
1189# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1190 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1191 if (RT_SUCCESS(rc))
1192 rc = VINF_SUCCESS;
1193# endif
1194 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1195 PGM_INVL_PG(pVCpu, GCPtrPage);
1196 }
1197 else
1198 {
1199 /*
1200 * The page table address changed.
1201 */
1202 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1203 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1204 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1205 ASMAtomicWriteSize(pPdeDst, 0);
1206 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1207 PGM_INVL_VCPU_TLBS(pVCpu);
1208 }
1209 }
1210 else
1211 {
1212 /*
1213 * 2/4MB - page.
1214 */
1215 /* Before freeing the page, check if anything really changed. */
1216 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1217 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1218# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1219 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1220 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1221# endif
1222 if ( pShwPage->GCPhys == GCPhys
1223 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1224 {
1225 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1226 /** @todo PAT */
1227 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1228 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1229 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1230 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1231 {
1232 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1233 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1234# if defined(IN_RC)
1235 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1236 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1237# endif
1238 return VINF_SUCCESS;
1239 }
1240 }
1241
1242 /*
1243 * Ok, the page table is present and it's been changed in the guest.
1244 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1245 * We could do this for some flushes in GC too, but we need an algorithm for
1246 * deciding which 4MB pages containing code likely to be executed very soon.
1247 */
1248 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1249 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1250 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1251 ASMAtomicWriteSize(pPdeDst, 0);
1252 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1253 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1254 }
1255 }
1256 else
1257 {
1258 /*
1259 * Page directory is not present, mark shadow PDE not present.
1260 */
1261 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1262 {
1263 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1264 ASMAtomicWriteSize(pPdeDst, 0);
1265 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1266 PGM_INVL_PG(pVCpu, GCPtrPage);
1267 }
1268 else
1269 {
1270 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1271 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1272 }
1273 }
1274# if defined(IN_RC)
1275 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1276 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1277# endif
1278 return rc;
1279
1280#else /* guest real and protected mode */
1281 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1282 return VINF_SUCCESS;
1283#endif
1284}
1285
1286
1287/**
1288 * Update the tracking of shadowed pages.
1289 *
1290 * @param pVCpu The VMCPU handle.
1291 * @param pShwPage The shadow page.
1292 * @param HCPhys The physical page we is being dereferenced.
1293 */
1294DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1295{
1296 PVM pVM = pVCpu->CTX_SUFF(pVM);
1297
1298 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1299 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1300
1301 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1302 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1303 * 2. write protect all shadowed pages. I.e. implement caching.
1304 */
1305 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1306
1307 /*
1308 * Find the guest address.
1309 */
1310 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1311 pRam;
1312 pRam = pRam->CTX_SUFF(pNext))
1313 {
1314 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1315 while (iPage-- > 0)
1316 {
1317 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1318 {
1319 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1320
1321 Assert(pShwPage->cPresent);
1322 Assert(pPool->cPresent);
1323 pShwPage->cPresent--;
1324 pPool->cPresent--;
1325
1326 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1327 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1328 return;
1329 }
1330 }
1331 }
1332
1333 for (;;)
1334 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1335}
1336
1337
1338/**
1339 * Update the tracking of shadowed pages.
1340 *
1341 * @param pVCpu The VMCPU handle.
1342 * @param pShwPage The shadow page.
1343 * @param u16 The top 16-bit of the pPage->HCPhys.
1344 * @param pPage Pointer to the guest page. this will be modified.
1345 * @param iPTDst The index into the shadow table.
1346 */
1347DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1348{
1349 PVM pVM = pVCpu->CTX_SUFF(pVM);
1350 /*
1351 * Just deal with the simple first time here.
1352 */
1353 if (!u16)
1354 {
1355 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1356 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1357 }
1358 else
1359 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1360
1361 /* write back */
1362 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1363 PGM_PAGE_SET_TRACKING(pPage, u16);
1364
1365 /* update statistics. */
1366 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1367 pShwPage->cPresent++;
1368 if (pShwPage->iFirstPresent > iPTDst)
1369 pShwPage->iFirstPresent = iPTDst;
1370}
1371
1372
1373/**
1374 * Creates a 4K shadow page for a guest page.
1375 *
1376 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1377 * physical address. The PdeSrc argument only the flags are used. No page structured
1378 * will be mapped in this function.
1379 *
1380 * @param pVCpu The VMCPU handle.
1381 * @param pPteDst Destination page table entry.
1382 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1383 * Can safely assume that only the flags are being used.
1384 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1385 * @param pShwPage Pointer to the shadow page.
1386 * @param iPTDst The index into the shadow table.
1387 *
1388 * @remark Not used for 2/4MB pages!
1389 */
1390DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1391{
1392 if (PteSrc.n.u1Present)
1393 {
1394 PVM pVM = pVCpu->CTX_SUFF(pVM);
1395
1396# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1397 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1398 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64)
1399 if (pShwPage->fDirty)
1400 {
1401 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1402 PX86PTPAE pGstPT;
1403
1404 pGstPT = (PX86PTPAE)&pPool->aDirtyPages[pShwPage->idxDirty][0];
1405 pGstPT->a[iPTDst].u = PteSrc.u;
1406 }
1407# endif
1408 /*
1409 * Find the ram range.
1410 */
1411 PPGMPAGE pPage;
1412 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1413 if (RT_SUCCESS(rc))
1414 {
1415 /* Ignore ballooned pages. Don't return errors or use a fatal assert here as part of a shadow sync range might included ballooned pages. */
1416 if (PGM_PAGE_IS_BALLOONED(pPage))
1417 return;
1418
1419#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1420 /* Try to make the page writable if necessary. */
1421 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1422 && ( PGM_PAGE_IS_ZERO(pPage)
1423 || ( PteSrc.n.u1Write
1424 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1425# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1426 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1427# endif
1428 )
1429 )
1430 )
1431 {
1432 rc = pgmPhysPageMakeWritable(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1433 AssertRC(rc);
1434 }
1435#endif
1436
1437 /** @todo investiage PWT, PCD and PAT. */
1438 /*
1439 * Make page table entry.
1440 */
1441 SHWPTE PteDst;
1442 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1443 {
1444 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1445 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1446 {
1447#if PGM_SHW_TYPE == PGM_TYPE_EPT
1448 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1449 PteDst.n.u1Present = 1;
1450 PteDst.n.u1Execute = 1;
1451 PteDst.n.u1IgnorePAT = 1;
1452 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1453 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1454#else
1455 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1456 | PGM_PAGE_GET_HCPHYS(pPage);
1457#endif
1458 }
1459 else
1460 {
1461 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1462 PteDst.u = 0;
1463 }
1464 /** @todo count these two kinds. */
1465 }
1466 else
1467 {
1468#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1469 /*
1470 * If the page or page directory entry is not marked accessed,
1471 * we mark the page not present.
1472 */
1473 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1474 {
1475 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1476 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1477 PteDst.u = 0;
1478 }
1479 else
1480 /*
1481 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1482 * when the page is modified.
1483 */
1484 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1485 {
1486 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1487 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1488 | PGM_PAGE_GET_HCPHYS(pPage)
1489 | PGM_PTFLAGS_TRACK_DIRTY;
1490 }
1491 else
1492#endif
1493 {
1494 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1495#if PGM_SHW_TYPE == PGM_TYPE_EPT
1496 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1497 PteDst.n.u1Present = 1;
1498 PteDst.n.u1Write = 1;
1499 PteDst.n.u1Execute = 1;
1500 PteDst.n.u1IgnorePAT = 1;
1501 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1502 /* PteDst.n.u1Size = 0 */
1503#else
1504 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1505 | PGM_PAGE_GET_HCPHYS(pPage);
1506#endif
1507 }
1508 }
1509
1510 /*
1511 * Make sure only allocated pages are mapped writable.
1512 */
1513 if ( PteDst.n.u1Write
1514 && PteDst.n.u1Present
1515 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1516 {
1517 /* Still applies to shared pages. */
1518 Assert(!PGM_PAGE_IS_ZERO(pPage));
1519 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1520 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1521 }
1522
1523 /*
1524 * Keep user track up to date.
1525 */
1526 if (PteDst.n.u1Present)
1527 {
1528 if (!pPteDst->n.u1Present)
1529 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1530 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1531 {
1532 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1533 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1534 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1535 }
1536 }
1537 else if (pPteDst->n.u1Present)
1538 {
1539 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1540 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1541 }
1542
1543 /*
1544 * Update statistics and commit the entry.
1545 */
1546#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1547 if (!PteSrc.n.u1Global)
1548 pShwPage->fSeenNonGlobal = true;
1549#endif
1550 ASMAtomicWriteSize(pPteDst, PteDst.u);
1551 }
1552 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1553 /** @todo count these. */
1554 }
1555 else
1556 {
1557 /*
1558 * Page not-present.
1559 */
1560 Log2(("SyncPageWorker: page not present in Pte\n"));
1561 /* Keep user track up to date. */
1562 if (pPteDst->n.u1Present)
1563 {
1564 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1565 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1566 }
1567 ASMAtomicWriteSize(pPteDst, 0);
1568 /** @todo count these. */
1569 }
1570}
1571
1572
1573/**
1574 * Syncs a guest OS page.
1575 *
1576 * There are no conflicts at this point, neither is there any need for
1577 * page table allocations.
1578 *
1579 * @returns VBox status code.
1580 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1581 * @param pVCpu The VMCPU handle.
1582 * @param PdeSrc Page directory entry of the guest.
1583 * @param GCPtrPage Guest context page address.
1584 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1585 * @param uErr Fault error (X86_TRAP_PF_*).
1586 */
1587PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1588{
1589 PVM pVM = pVCpu->CTX_SUFF(pVM);
1590 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1591 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1592
1593 Assert(PGMIsLockOwner(pVM));
1594
1595#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1596 || PGM_GST_TYPE == PGM_TYPE_PAE \
1597 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1598 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1599 && PGM_SHW_TYPE != PGM_TYPE_EPT
1600
1601 /*
1602 * Assert preconditions.
1603 */
1604 Assert(PdeSrc.n.u1Present);
1605 Assert(cPages);
1606# if 0 /* rarely useful; leave for debugging. */
1607 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1608# endif
1609
1610 /*
1611 * Get the shadow PDE, find the shadow page table in the pool.
1612 */
1613# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1614 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1615 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1616
1617 /* Fetch the pgm pool shadow descriptor. */
1618 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1619 Assert(pShwPde);
1620
1621# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1622 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1623 PPGMPOOLPAGE pShwPde = NULL;
1624 PX86PDPAE pPDDst;
1625
1626 /* Fetch the pgm pool shadow descriptor. */
1627 int rc2 = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1628 AssertRCSuccessReturn(rc2, rc2);
1629 Assert(pShwPde);
1630
1631 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1632 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1633
1634# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1635 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1636 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1637 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1638 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1639
1640 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1641 AssertRCSuccessReturn(rc2, rc2);
1642 Assert(pPDDst && pPdptDst);
1643 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1644# endif
1645 SHWPDE PdeDst = *pPdeDst;
1646
1647 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1648 if (!PdeDst.n.u1Present)
1649 {
1650 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE p=%p/%RX64\n", pPdeDst, (uint64_t)PdeDst.u));
1651 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1652 return VINF_SUCCESS; /* force the instruction to be executed again. */
1653 }
1654
1655 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1656 Assert(pShwPage);
1657
1658# if PGM_GST_TYPE == PGM_TYPE_AMD64
1659 /* Fetch the pgm pool shadow descriptor. */
1660 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1661 Assert(pShwPde);
1662# endif
1663
1664# if defined(IN_RC)
1665 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1666 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1667# endif
1668
1669 /*
1670 * Check that the page is present and that the shadow PDE isn't out of sync.
1671 */
1672# if PGM_GST_TYPE == PGM_TYPE_32BIT
1673 const bool fBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1674# else
1675 const bool fBigPage = PdeSrc.b.u1Size;
1676# endif
1677 RTGCPHYS GCPhys;
1678 if (!fBigPage)
1679 {
1680 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1681# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1682 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1683 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1684# endif
1685 }
1686 else
1687 {
1688 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1689# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1690 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1691 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1692# endif
1693 }
1694 if ( pShwPage->GCPhys == GCPhys
1695 && PdeSrc.n.u1Present
1696 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1697 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1698# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1699 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !CPUMIsGuestNXEnabled(pVCpu))
1700# endif
1701 )
1702 {
1703 /*
1704 * Check that the PDE is marked accessed already.
1705 * Since we set the accessed bit *before* getting here on a #PF, this
1706 * check is only meant for dealing with non-#PF'ing paths.
1707 */
1708 if (PdeSrc.n.u1Accessed)
1709 {
1710 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1711 if (!fBigPage)
1712 {
1713 /*
1714 * 4KB Page - Map the guest page table.
1715 */
1716 PGSTPT pPTSrc;
1717 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1718 if (RT_SUCCESS(rc))
1719 {
1720# ifdef PGM_SYNC_N_PAGES
1721 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1722 if ( cPages > 1
1723 && !(uErr & X86_TRAP_PF_P)
1724 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1725 {
1726 /*
1727 * This code path is currently only taken when the caller is PGMTrap0eHandler
1728 * for non-present pages!
1729 *
1730 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1731 * deal with locality.
1732 */
1733 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1734# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1735 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1736 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1737# else
1738 const unsigned offPTSrc = 0;
1739# endif
1740 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1741 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1742 iPTDst = 0;
1743 else
1744 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1745 for (; iPTDst < iPTDstEnd; iPTDst++)
1746 {
1747 if (!pPTDst->a[iPTDst].n.u1Present)
1748 {
1749 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1750 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1751 NOREF(GCPtrCurPage);
1752#ifndef IN_RING0
1753 /*
1754 * Assuming kernel code will be marked as supervisor - and not as user level
1755 * and executed using a conforming code selector - And marked as readonly.
1756 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1757 */
1758 PPGMPAGE pPage;
1759 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1760 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1761 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
1762 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1763 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1764 )
1765#endif /* else: CSAM not active */
1766 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1767 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1768 GCPtrCurPage, PteSrc.n.u1Present,
1769 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1770 PteSrc.n.u1User & PdeSrc.n.u1User,
1771 (uint64_t)PteSrc.u,
1772 (uint64_t)pPTDst->a[iPTDst].u,
1773 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1774 }
1775 }
1776 }
1777 else
1778# endif /* PGM_SYNC_N_PAGES */
1779 {
1780 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1781 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1782 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1783 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1784 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1785 GCPtrPage, PteSrc.n.u1Present,
1786 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1787 PteSrc.n.u1User & PdeSrc.n.u1User,
1788 (uint64_t)PteSrc.u,
1789 (uint64_t)pPTDst->a[iPTDst].u,
1790 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1791 }
1792 }
1793 else /* MMIO or invalid page: emulated in #PF handler. */
1794 {
1795 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1796 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1797 }
1798 }
1799 else
1800 {
1801 /*
1802 * 4/2MB page - lazy syncing shadow 4K pages.
1803 * (There are many causes of getting here, it's no longer only CSAM.)
1804 */
1805 /* Calculate the GC physical address of this 4KB shadow page. */
1806 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1807 /* Find ram range. */
1808 PPGMPAGE pPage;
1809 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1810 if (RT_SUCCESS(rc))
1811 {
1812 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
1813
1814# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1815 /* Try to make the page writable if necessary. */
1816 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1817 && ( PGM_PAGE_IS_ZERO(pPage)
1818 || ( PdeSrc.n.u1Write
1819 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1820# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1821 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1822# endif
1823 )
1824 )
1825 )
1826 {
1827 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1828 AssertRC(rc);
1829 }
1830# endif
1831
1832 /*
1833 * Make shadow PTE entry.
1834 */
1835 SHWPTE PteDst;
1836 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1837 | PGM_PAGE_GET_HCPHYS(pPage);
1838 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1839 {
1840 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1841 PteDst.n.u1Write = 0;
1842 else
1843 PteDst.u = 0;
1844 }
1845
1846 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1847 if ( PteDst.n.u1Present
1848 && !pPTDst->a[iPTDst].n.u1Present)
1849 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1850
1851 /* Make sure only allocated pages are mapped writable. */
1852 if ( PteDst.n.u1Write
1853 && PteDst.n.u1Present
1854 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1855 {
1856 /* Still applies to shared pages. */
1857 Assert(!PGM_PAGE_IS_ZERO(pPage));
1858 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1859 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1860 }
1861
1862 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1863
1864 /*
1865 * If the page is not flagged as dirty and is writable, then make it read-only
1866 * at PD level, so we can set the dirty bit when the page is modified.
1867 *
1868 * ASSUMES that page access handlers are implemented on page table entry level.
1869 * Thus we will first catch the dirty access and set PDE.D and restart. If
1870 * there is an access handler, we'll trap again and let it work on the problem.
1871 */
1872 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1873 * As for invlpg, it simply frees the whole shadow PT.
1874 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1875 if ( !PdeSrc.b.u1Dirty
1876 && PdeSrc.b.u1Write)
1877 {
1878 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1879 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1880 PdeDst.n.u1Write = 0;
1881 }
1882 else
1883 {
1884 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1885 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1886 }
1887 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1888 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1889 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1890 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1891 }
1892 else
1893 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1894 }
1895# if defined(IN_RC)
1896 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1897 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1898# endif
1899 return VINF_SUCCESS;
1900 }
1901 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1902 }
1903 else
1904 {
1905 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1906 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1907 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1908 }
1909
1910 /*
1911 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1912 * Yea, I'm lazy.
1913 */
1914 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1915 ASMAtomicWriteSize(pPdeDst, 0);
1916
1917# if defined(IN_RC)
1918 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1919 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1920# endif
1921 PGM_INVL_VCPU_TLBS(pVCpu);
1922 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1923
1924#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1925 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1926 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1927 && !defined(IN_RC)
1928
1929# ifdef PGM_SYNC_N_PAGES
1930 /*
1931 * Get the shadow PDE, find the shadow page table in the pool.
1932 */
1933# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1934 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
1935
1936# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1937 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
1938
1939# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1940 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1941 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1942 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1943 X86PDEPAE PdeDst;
1944 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1945
1946 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1947 AssertRCSuccessReturn(rc, rc);
1948 Assert(pPDDst && pPdptDst);
1949 PdeDst = pPDDst->a[iPDDst];
1950# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1951 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1952 PEPTPD pPDDst;
1953 EPTPDE PdeDst;
1954
1955 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
1956 if (rc != VINF_SUCCESS)
1957 {
1958 AssertRC(rc);
1959 return rc;
1960 }
1961 Assert(pPDDst);
1962 PdeDst = pPDDst->a[iPDDst];
1963# endif
1964 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1965 if (!PdeDst.n.u1Present)
1966 {
1967 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
1968 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1969 return VINF_SUCCESS; /* force the instruction to be executed again. */
1970 }
1971
1972 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
1973 if (PdeDst.n.u1Size)
1974 {
1975 Assert(HWACCMIsNestedPagingActive(pVM));
1976 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
1977 return VINF_SUCCESS;
1978 }
1979
1980 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1981 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1982
1983 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1984 if ( cPages > 1
1985 && !(uErr & X86_TRAP_PF_P)
1986 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1987 {
1988 /*
1989 * This code path is currently only taken when the caller is PGMTrap0eHandler
1990 * for non-present pages!
1991 *
1992 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1993 * deal with locality.
1994 */
1995 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1996 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1997 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1998 iPTDst = 0;
1999 else
2000 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2001 for (; iPTDst < iPTDstEnd; iPTDst++)
2002 {
2003 if (!pPTDst->a[iPTDst].n.u1Present)
2004 {
2005 GSTPTE PteSrc;
2006
2007 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2008
2009 /* Fake the page table entry */
2010 PteSrc.u = GCPtrCurPage;
2011 PteSrc.n.u1Present = 1;
2012 PteSrc.n.u1Dirty = 1;
2013 PteSrc.n.u1Accessed = 1;
2014 PteSrc.n.u1Write = 1;
2015 PteSrc.n.u1User = 1;
2016
2017 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2018
2019 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2020 GCPtrCurPage, PteSrc.n.u1Present,
2021 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2022 PteSrc.n.u1User & PdeSrc.n.u1User,
2023 (uint64_t)PteSrc.u,
2024 (uint64_t)pPTDst->a[iPTDst].u,
2025 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2026
2027 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2028 break;
2029 }
2030 else
2031 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2032 }
2033 }
2034 else
2035# endif /* PGM_SYNC_N_PAGES */
2036 {
2037 GSTPTE PteSrc;
2038 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2039 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2040
2041 /* Fake the page table entry */
2042 PteSrc.u = GCPtrCurPage;
2043 PteSrc.n.u1Present = 1;
2044 PteSrc.n.u1Dirty = 1;
2045 PteSrc.n.u1Accessed = 1;
2046 PteSrc.n.u1Write = 1;
2047 PteSrc.n.u1User = 1;
2048 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2049
2050 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2051 GCPtrPage, PteSrc.n.u1Present,
2052 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2053 PteSrc.n.u1User & PdeSrc.n.u1User,
2054 (uint64_t)PteSrc.u,
2055 (uint64_t)pPTDst->a[iPTDst].u,
2056 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2057 }
2058 return VINF_SUCCESS;
2059
2060#else
2061 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2062 return VERR_INTERNAL_ERROR;
2063#endif
2064}
2065
2066
2067#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2068/**
2069 * Investigate page fault and handle write protection page faults caused by
2070 * dirty bit tracking.
2071 *
2072 * @returns VBox status code.
2073 * @param pVCpu The VMCPU handle.
2074 * @param uErr Page fault error code.
2075 * @param pPdeSrc Guest page directory entry.
2076 * @param GCPtrPage Guest context page address.
2077 */
2078PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2079{
2080 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2081 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2082 bool fMaybeWriteProtFault = fWriteFault && (fUserLevelFault || CPUMIsGuestR0WriteProtEnabled(pVCpu));
2083# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2084 bool fMaybeNXEFault = (uErr & X86_TRAP_PF_ID) && CPUMIsGuestNXEnabled(pVCpu);
2085# endif
2086 unsigned uPageFaultLevel;
2087 int rc;
2088 PVM pVM = pVCpu->CTX_SUFF(pVM);
2089
2090 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2091
2092# if PGM_GST_TYPE == PGM_TYPE_PAE \
2093 || PGM_GST_TYPE == PGM_TYPE_AMD64
2094
2095# if PGM_GST_TYPE == PGM_TYPE_AMD64
2096 PX86PML4E pPml4eSrc;
2097 PX86PDPE pPdpeSrc;
2098
2099 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc);
2100 Assert(pPml4eSrc);
2101
2102 /*
2103 * Real page fault? (PML4E level)
2104 */
2105 if ( (uErr & X86_TRAP_PF_RSVD)
2106 || !pPml4eSrc->n.u1Present
2107 || (fMaybeWriteProtFault && !pPml4eSrc->n.u1Write)
2108 || (fMaybeNXEFault && pPml4eSrc->n.u1NoExecute)
2109 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2110 )
2111 {
2112 uPageFaultLevel = 0;
2113 goto l_UpperLevelPageFault;
2114 }
2115 Assert(pPdpeSrc);
2116
2117# else /* PAE */
2118 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVCpu->pgm.s, GCPtrPage);
2119# endif /* PAE */
2120
2121 /*
2122 * Real page fault? (PDPE level)
2123 */
2124 if ( (uErr & X86_TRAP_PF_RSVD)
2125 || !pPdpeSrc->n.u1Present
2126# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2127 || (fMaybeWriteProtFault && !pPdpeSrc->lm.u1Write)
2128 || (fMaybeNXEFault && pPdpeSrc->lm.u1NoExecute)
2129 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2130# endif
2131 )
2132 {
2133 uPageFaultLevel = 1;
2134 goto l_UpperLevelPageFault;
2135 }
2136# endif
2137
2138 /*
2139 * Real page fault? (PDE level)
2140 */
2141 if ( (uErr & X86_TRAP_PF_RSVD)
2142 || !pPdeSrc->n.u1Present
2143 || (fMaybeWriteProtFault && !pPdeSrc->n.u1Write)
2144# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2145 || (fMaybeNXEFault && pPdeSrc->n.u1NoExecute)
2146# endif
2147 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2148 {
2149 uPageFaultLevel = 2;
2150 goto l_UpperLevelPageFault;
2151 }
2152
2153 /*
2154 * First check the easy case where the page directory has been marked read-only to track
2155 * the dirty bit of an emulated BIG page
2156 */
2157 if ( pPdeSrc->b.u1Size
2158# if PGM_GST_TYPE == PGM_TYPE_32BIT
2159 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2160# endif
2161 )
2162 {
2163 /* Mark guest page directory as accessed */
2164# if PGM_GST_TYPE == PGM_TYPE_AMD64
2165 pPml4eSrc->n.u1Accessed = 1;
2166 pPdpeSrc->lm.u1Accessed = 1;
2167# endif
2168 pPdeSrc->b.u1Accessed = 1;
2169
2170 /*
2171 * Only write protection page faults are relevant here.
2172 */
2173 if (fWriteFault)
2174 {
2175 /* Mark guest page directory as dirty (BIG page only). */
2176 pPdeSrc->b.u1Dirty = 1;
2177 }
2178 return VINF_SUCCESS;
2179 }
2180 /* else: 4KB page table */
2181
2182 /*
2183 * Map the guest page table.
2184 */
2185 PGSTPT pPTSrc;
2186 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2187 if (RT_SUCCESS(rc))
2188 {
2189 /*
2190 * Real page fault?
2191 */
2192 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2193 const GSTPTE PteSrc = *pPteSrc;
2194 if ( !PteSrc.n.u1Present
2195 || (fMaybeWriteProtFault && !PteSrc.n.u1Write)
2196# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2197 || (fMaybeNXEFault && PteSrc.n.u1NoExecute)
2198# endif
2199 || (fUserLevelFault && !PteSrc.n.u1User)
2200 )
2201 {
2202 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2203 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2204
2205 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2206 * See the 2nd case above as well.
2207 */
2208 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2209 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2210
2211 return VINF_EM_RAW_GUEST_TRAP;
2212 }
2213 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2214
2215 /*
2216 * Set the accessed bits in the page directory and the page table.
2217 */
2218# if PGM_GST_TYPE == PGM_TYPE_AMD64
2219 pPml4eSrc->n.u1Accessed = 1;
2220 pPdpeSrc->lm.u1Accessed = 1;
2221# endif
2222 pPdeSrc->n.u1Accessed = 1;
2223 pPteSrc->n.u1Accessed = 1;
2224
2225 /*
2226 * Only write protection page faults are relevant here.
2227 */
2228 if (fWriteFault)
2229 {
2230 /* Write access, so mark guest entry as dirty. */
2231# ifdef VBOX_WITH_STATISTICS
2232 if (!pPteSrc->n.u1Dirty)
2233 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2234 else
2235 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2236# endif
2237
2238 pPteSrc->n.u1Dirty = 1;
2239 }
2240 return VINF_SUCCESS;
2241 }
2242 AssertRC(rc);
2243 return rc;
2244
2245
2246l_UpperLevelPageFault:
2247 /*
2248 * Pagefault detected while checking the PML4E, PDPE or PDE.
2249 * Single exit handler to get rid of duplicate code paths.
2250 */
2251 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2252 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2253
2254 if ( 1
2255# if PGM_GST_TYPE == PGM_TYPE_AMD64
2256 && pPml4eSrc->n.u1Present
2257# endif
2258# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2259 && pPdpeSrc->n.u1Present
2260# endif
2261 && pPdeSrc->n.u1Present)
2262 {
2263 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2264 if ( pPdeSrc->b.u1Size
2265# if PGM_GST_TYPE == PGM_TYPE_32BIT
2266 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2267# endif
2268 )
2269 {
2270 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2271 }
2272 else
2273 {
2274 /*
2275 * Map the guest page table.
2276 */
2277 PGSTPT pPTSrc2;
2278 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc2);
2279 if (RT_SUCCESS(rc))
2280 {
2281 PGSTPTE pPteSrc = &pPTSrc2->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2282 if (pPteSrc->n.u1Present)
2283 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2284 }
2285 AssertRC(rc);
2286 }
2287 }
2288 return VINF_EM_RAW_GUEST_TRAP;
2289}
2290
2291/**
2292 * Handle dirty bit tracking faults.
2293 *
2294 * @returns VBox status code.
2295 * @param pVCpu The VMCPU handle.
2296 * @param uErr Page fault error code.
2297 * @param pPdeSrc Guest page directory entry.
2298 * @param pPdeDst Shadow page directory entry.
2299 * @param GCPtrPage Guest context page address.
2300 */
2301PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2302{
2303# if PGM_GST_TYPE == PGM_TYPE_32BIT
2304 const bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
2305# else
2306 const bool fBigPagesSupported = true;
2307# endif
2308 PVM pVM = pVCpu->CTX_SUFF(pVM);
2309 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2310
2311 Assert(PGMIsLockOwner(pVM));
2312
2313 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2314 {
2315 if ( pPdeDst->n.u1Present
2316 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2317 {
2318 SHWPDE PdeDst = *pPdeDst;
2319
2320 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2321 Assert(pPdeSrc->b.u1Write);
2322
2323 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2324 * fault again and take this path to only invalidate the entry.
2325 */
2326 PdeDst.n.u1Write = 1;
2327 PdeDst.n.u1Accessed = 1;
2328 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2329 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2330 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2331 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2332 }
2333# ifdef IN_RING0
2334 else
2335 /* Check for stale TLB entry; only applies to the SMP guest case. */
2336 if ( pVM->cCpus > 1
2337 && pPdeDst->n.u1Write
2338 && pPdeDst->n.u1Accessed)
2339 {
2340 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2341 if (pShwPage)
2342 {
2343 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2344 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2345 if ( pPteDst->n.u1Present
2346 && pPteDst->n.u1Write)
2347 {
2348 /* Stale TLB entry. */
2349 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2350 PGM_INVL_PG(pVCpu, GCPtrPage);
2351 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2352 }
2353 }
2354 }
2355# endif /* IN_RING0 */
2356 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2357 }
2358
2359 /*
2360 * Map the guest page table.
2361 */
2362 PGSTPT pPTSrc;
2363 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2364 if (RT_SUCCESS(rc))
2365 {
2366 if (pPdeDst->n.u1Present)
2367 {
2368 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2369 const GSTPTE PteSrc = *pPteSrc;
2370#ifndef IN_RING0
2371 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2372 * Our individual shadow handlers will provide more information and force a fatal exit.
2373 */
2374 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2375 {
2376 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2377 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2378 }
2379#endif
2380 /*
2381 * Map shadow page table.
2382 */
2383 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2384 if (pShwPage)
2385 {
2386 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2387 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2388 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2389 {
2390 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2391 {
2392 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2393 SHWPTE PteDst = *pPteDst;
2394
2395 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2396 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2397
2398 Assert(pPteSrc->n.u1Write);
2399
2400 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2401 * fault again and take this path to only invalidate the entry.
2402 */
2403 if (RT_LIKELY(pPage))
2404 {
2405 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2406 {
2407 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2408 PteDst.n.u1Write = 0;
2409 }
2410 else
2411 {
2412 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2413 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2414 {
2415 rc = pgmPhysPageMakeWritable(pVM, pPage, pPteSrc->u & GST_PTE_PG_MASK);
2416 AssertRC(rc);
2417 }
2418 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2419 {
2420 PteDst.n.u1Write = 1;
2421 }
2422 else
2423 {
2424 /* Still applies to shared pages. */
2425 Assert(!PGM_PAGE_IS_ZERO(pPage));
2426 PteDst.n.u1Write = 0;
2427 }
2428 }
2429 }
2430 else
2431 PteDst.n.u1Write = 1;
2432
2433 PteDst.n.u1Dirty = 1;
2434 PteDst.n.u1Accessed = 1;
2435 PteDst.au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2436 ASMAtomicWriteSize(pPteDst, PteDst.u);
2437 PGM_INVL_PG(pVCpu, GCPtrPage);
2438 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2439 }
2440# ifdef IN_RING0
2441 else
2442 /* Check for stale TLB entry; only applies to the SMP guest case. */
2443 if ( pVM->cCpus > 1
2444 && pPteDst->n.u1Write == 1
2445 && pPteDst->n.u1Accessed == 1)
2446 {
2447 /* Stale TLB entry. */
2448 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2449 PGM_INVL_PG(pVCpu, GCPtrPage);
2450 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2451 }
2452# endif
2453 }
2454 }
2455 else
2456 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2457 }
2458 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2459 }
2460 AssertRC(rc);
2461 return rc;
2462}
2463#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2464
2465
2466/**
2467 * Sync a shadow page table.
2468 *
2469 * The shadow page table is not present. This includes the case where
2470 * there is a conflict with a mapping.
2471 *
2472 * @returns VBox status code.
2473 * @param pVCpu The VMCPU handle.
2474 * @param iPD Page directory index.
2475 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2476 * Assume this is a temporary mapping.
2477 * @param GCPtrPage GC Pointer of the page that caused the fault
2478 */
2479PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2480{
2481 PVM pVM = pVCpu->CTX_SUFF(pVM);
2482 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2483
2484 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2485#if 0 /* rarely useful; leave for debugging. */
2486 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2487#endif
2488 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2489
2490 Assert(PGMIsLocked(pVM));
2491
2492#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2493 || PGM_GST_TYPE == PGM_TYPE_PAE \
2494 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2495 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2496 && PGM_SHW_TYPE != PGM_TYPE_EPT
2497
2498 int rc = VINF_SUCCESS;
2499
2500 /*
2501 * Validate input a little bit.
2502 */
2503 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2504# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2505 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2506 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2507
2508 /* Fetch the pgm pool shadow descriptor. */
2509 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2510 Assert(pShwPde);
2511
2512# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2513 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2514 PPGMPOOLPAGE pShwPde = NULL;
2515 PX86PDPAE pPDDst;
2516 PSHWPDE pPdeDst;
2517
2518 /* Fetch the pgm pool shadow descriptor. */
2519 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2520 AssertRCSuccessReturn(rc, rc);
2521 Assert(pShwPde);
2522
2523 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2524 pPdeDst = &pPDDst->a[iPDDst];
2525
2526# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2527 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2528 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2529 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2530 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2531 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2532 AssertRCSuccessReturn(rc, rc);
2533 Assert(pPDDst);
2534 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2535# endif
2536 SHWPDE PdeDst = *pPdeDst;
2537
2538# if PGM_GST_TYPE == PGM_TYPE_AMD64
2539 /* Fetch the pgm pool shadow descriptor. */
2540 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2541 Assert(pShwPde);
2542# endif
2543
2544# ifndef PGM_WITHOUT_MAPPINGS
2545 /*
2546 * Check for conflicts.
2547 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2548 * HC: Simply resolve the conflict.
2549 */
2550 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2551 {
2552 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2553# ifndef IN_RING3
2554 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2555 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2556 return VERR_ADDRESS_CONFLICT;
2557# else
2558 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2559 Assert(pMapping);
2560# if PGM_GST_TYPE == PGM_TYPE_32BIT
2561 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2562# elif PGM_GST_TYPE == PGM_TYPE_PAE
2563 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2564# else
2565 AssertFailed(); /* can't happen for amd64 */
2566# endif
2567 if (RT_FAILURE(rc))
2568 {
2569 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2570 return rc;
2571 }
2572 PdeDst = *pPdeDst;
2573# endif
2574 }
2575# endif /* !PGM_WITHOUT_MAPPINGS */
2576 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2577
2578# if defined(IN_RC)
2579 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2580 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2581# endif
2582
2583 /*
2584 * Sync page directory entry.
2585 */
2586 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2587 if (PdeSrc.n.u1Present)
2588 {
2589 /*
2590 * Allocate & map the page table.
2591 */
2592 PSHWPT pPTDst;
2593# if PGM_GST_TYPE == PGM_TYPE_32BIT
2594 const bool fPageTable = !PdeSrc.b.u1Size || !CPUMIsGuestPageSizeExtEnabled(pVCpu);
2595# else
2596 const bool fPageTable = !PdeSrc.b.u1Size;
2597# endif
2598 PPGMPOOLPAGE pShwPage;
2599 RTGCPHYS GCPhys;
2600 if (fPageTable)
2601 {
2602 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2603# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2604 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2605 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2606# endif
2607 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2608 }
2609 else
2610 {
2611 PGMPOOLACCESS enmAccess;
2612# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2613 const bool fNoExecute = PdeSrc.n.u1NoExecute && CPUMIsGuestNXEnabled(pVCpu);
2614# else
2615 const bool fNoExecute = false;
2616# endif
2617
2618 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2619# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2620 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2621 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2622# endif
2623 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2624 if (PdeSrc.n.u1User)
2625 {
2626 if (PdeSrc.n.u1Write)
2627 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2628 else
2629 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2630 }
2631 else
2632 {
2633 if (PdeSrc.n.u1Write)
2634 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2635 else
2636 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2637 }
2638 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2639 }
2640 if (rc == VINF_SUCCESS)
2641 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2642 else if (rc == VINF_PGM_CACHED_PAGE)
2643 {
2644 /*
2645 * The PT was cached, just hook it up.
2646 */
2647 if (fPageTable)
2648 PdeDst.u = pShwPage->Core.Key
2649 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2650 else
2651 {
2652 PdeDst.u = pShwPage->Core.Key
2653 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2654 /* (see explanation and assumptions further down.) */
2655 if ( !PdeSrc.b.u1Dirty
2656 && PdeSrc.b.u1Write)
2657 {
2658 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2659 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2660 PdeDst.b.u1Write = 0;
2661 }
2662 }
2663 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2664# if defined(IN_RC)
2665 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2666# endif
2667 return VINF_SUCCESS;
2668 }
2669 else if (rc == VERR_PGM_POOL_FLUSHED)
2670 {
2671 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2672# if defined(IN_RC)
2673 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2674# endif
2675 return VINF_PGM_SYNC_CR3;
2676 }
2677 else
2678 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2679 PdeDst.u &= X86_PDE_AVL_MASK;
2680 PdeDst.u |= pShwPage->Core.Key;
2681
2682 /*
2683 * Page directory has been accessed (this is a fault situation, remember).
2684 */
2685 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2686 if (fPageTable)
2687 {
2688 /*
2689 * Page table - 4KB.
2690 *
2691 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2692 */
2693 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2694 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2695 PGSTPT pPTSrc;
2696 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2697 if (RT_SUCCESS(rc))
2698 {
2699 /*
2700 * Start by syncing the page directory entry so CSAM's TLB trick works.
2701 */
2702 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2703 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2704 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2705# if defined(IN_RC)
2706 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2707# endif
2708
2709 /*
2710 * Directory/page user or supervisor privilege: (same goes for read/write)
2711 *
2712 * Directory Page Combined
2713 * U/S U/S U/S
2714 * 0 0 0
2715 * 0 1 0
2716 * 1 0 0
2717 * 1 1 1
2718 *
2719 * Simple AND operation. Table listed for completeness.
2720 *
2721 */
2722 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2723# ifdef PGM_SYNC_N_PAGES
2724 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2725 unsigned iPTDst = iPTBase;
2726 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2727 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2728 iPTDst = 0;
2729 else
2730 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2731# else /* !PGM_SYNC_N_PAGES */
2732 unsigned iPTDst = 0;
2733 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2734# endif /* !PGM_SYNC_N_PAGES */
2735# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2736 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2737 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2738# else
2739 const unsigned offPTSrc = 0;
2740# endif
2741 for (; iPTDst < iPTDstEnd; iPTDst++)
2742 {
2743 const unsigned iPTSrc = iPTDst + offPTSrc;
2744 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2745
2746 if (PteSrc.n.u1Present) /* we've already cleared it above */
2747 {
2748# ifndef IN_RING0
2749 /*
2750 * Assuming kernel code will be marked as supervisor - and not as user level
2751 * and executed using a conforming code selector - And marked as readonly.
2752 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2753 */
2754 PPGMPAGE pPage;
2755 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2756 || !CSAMDoesPageNeedScanning(pVM, (iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT))
2757 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2758 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2759 )
2760# endif
2761 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2762 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2763 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2764 PteSrc.n.u1Present,
2765 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2766 PteSrc.n.u1User & PdeSrc.n.u1User,
2767 (uint64_t)PteSrc.u,
2768 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2769 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2770 }
2771 } /* for PTEs */
2772 }
2773 }
2774 else
2775 {
2776 /*
2777 * Big page - 2/4MB.
2778 *
2779 * We'll walk the ram range list in parallel and optimize lookups.
2780 * We will only sync on shadow page table at a time.
2781 */
2782 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2783
2784 /**
2785 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2786 */
2787
2788 /*
2789 * Start by syncing the page directory entry.
2790 */
2791 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2792 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2793
2794 /*
2795 * If the page is not flagged as dirty and is writable, then make it read-only
2796 * at PD level, so we can set the dirty bit when the page is modified.
2797 *
2798 * ASSUMES that page access handlers are implemented on page table entry level.
2799 * Thus we will first catch the dirty access and set PDE.D and restart. If
2800 * there is an access handler, we'll trap again and let it work on the problem.
2801 */
2802 /** @todo move the above stuff to a section in the PGM documentation. */
2803 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2804 if ( !PdeSrc.b.u1Dirty
2805 && PdeSrc.b.u1Write)
2806 {
2807 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2808 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2809 PdeDst.b.u1Write = 0;
2810 }
2811 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2812# if defined(IN_RC)
2813 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2814# endif
2815
2816 /*
2817 * Fill the shadow page table.
2818 */
2819 /* Get address and flags from the source PDE. */
2820 SHWPTE PteDstBase;
2821 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2822
2823 /* Loop thru the entries in the shadow PT. */
2824 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2825 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2826 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2827 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2828 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2829 unsigned iPTDst = 0;
2830 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2831 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2832 {
2833 /* Advance ram range list. */
2834 while (pRam && GCPhys > pRam->GCPhysLast)
2835 pRam = pRam->CTX_SUFF(pNext);
2836 if (pRam && GCPhys >= pRam->GCPhys)
2837 {
2838 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2839 do
2840 {
2841 /* Make shadow PTE. */
2842 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2843 SHWPTE PteDst;
2844
2845# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2846 /* Try to make the page writable if necessary. */
2847 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2848 && ( PGM_PAGE_IS_ZERO(pPage)
2849 || ( PteDstBase.n.u1Write
2850 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2851# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2852 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2853# endif
2854 && !PGM_PAGE_IS_BALLOONED(pPage))
2855 )
2856 )
2857 {
2858 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2859 AssertRCReturn(rc, rc);
2860 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2861 break;
2862 }
2863# endif
2864
2865 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2866 {
2867 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2868 {
2869 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2870 PteDst.n.u1Write = 0;
2871 }
2872 else
2873 PteDst.u = 0;
2874 }
2875 else
2876 if (PGM_PAGE_IS_BALLOONED(pPage))
2877 {
2878 /* Skip ballooned pages. */
2879 PteDst.u = 0;
2880 }
2881# ifndef IN_RING0
2882 /*
2883 * Assuming kernel code will be marked as supervisor and not as user level and executed
2884 * using a conforming code selector. Don't check for readonly, as that implies the whole
2885 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2886 */
2887 else if ( !PdeSrc.n.u1User
2888 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
2889 PteDst.u = 0;
2890# endif
2891 else
2892 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2893
2894 /* Only map writable pages writable. */
2895 if ( PteDst.n.u1Write
2896 && PteDst.n.u1Present
2897 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2898 {
2899 /* Still applies to shared pages. */
2900 Assert(!PGM_PAGE_IS_ZERO(pPage));
2901 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2902 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2903 }
2904
2905 if (PteDst.n.u1Present)
2906 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2907
2908 /* commit it */
2909 pPTDst->a[iPTDst] = PteDst;
2910 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2911 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2912 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2913
2914 /* advance */
2915 GCPhys += PAGE_SIZE;
2916 iHCPage++;
2917 iPTDst++;
2918 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2919 && GCPhys <= pRam->GCPhysLast);
2920 }
2921 else if (pRam)
2922 {
2923 Log(("Invalid pages at %RGp\n", GCPhys));
2924 do
2925 {
2926 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2927 GCPhys += PAGE_SIZE;
2928 iPTDst++;
2929 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2930 && GCPhys < pRam->GCPhys);
2931 }
2932 else
2933 {
2934 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2935 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2936 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2937 }
2938 } /* while more PTEs */
2939 } /* 4KB / 4MB */
2940 }
2941 else
2942 AssertRelease(!PdeDst.n.u1Present);
2943
2944 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2945 if (RT_FAILURE(rc))
2946 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2947 return rc;
2948
2949#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2950 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2951 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2952 && !defined(IN_RC)
2953
2954 /*
2955 * Validate input a little bit.
2956 */
2957 int rc = VINF_SUCCESS;
2958# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2959 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2960 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2961
2962 /* Fetch the pgm pool shadow descriptor. */
2963 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2964 Assert(pShwPde);
2965
2966# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2967 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2968 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
2969 PX86PDPAE pPDDst;
2970 PSHWPDE pPdeDst;
2971
2972 /* Fetch the pgm pool shadow descriptor. */
2973 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2974 AssertRCSuccessReturn(rc, rc);
2975 Assert(pShwPde);
2976
2977 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2978 pPdeDst = &pPDDst->a[iPDDst];
2979
2980# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2981 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2982 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2983 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2984 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
2985 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2986 AssertRCSuccessReturn(rc, rc);
2987 Assert(pPDDst);
2988 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2989
2990 /* Fetch the pgm pool shadow descriptor. */
2991 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2992 Assert(pShwPde);
2993
2994# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2995 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2996 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2997 PEPTPD pPDDst;
2998 PEPTPDPT pPdptDst;
2999
3000 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3001 if (rc != VINF_SUCCESS)
3002 {
3003 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3004 AssertRC(rc);
3005 return rc;
3006 }
3007 Assert(pPDDst);
3008 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3009
3010 /* Fetch the pgm pool shadow descriptor. */
3011 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3012 Assert(pShwPde);
3013# endif
3014 SHWPDE PdeDst = *pPdeDst;
3015
3016 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
3017 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3018
3019# if defined(PGM_WITH_LARGE_PAGES) && (PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE)
3020# if (PGM_SHW_TYPE != PGM_TYPE_EPT) /* PGM_TYPE_EPT implies nested paging */
3021 if (HWACCMIsNestedPagingActive(pVM))
3022# endif
3023 {
3024 PPGMPAGE pPage;
3025
3026 /* Check if we allocated a big page before for this 2 MB range. */
3027 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
3028 if (RT_SUCCESS(rc))
3029 {
3030 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3031
3032 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3033 {
3034 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3035 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3036 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3037 }
3038 else
3039 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
3040 {
3041 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3042 rc = pgmPhysIsValidLargePage(pVM, GCPtrPage, pPage);
3043 if (RT_SUCCESS(rc))
3044 {
3045 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3046 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3047 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3048 }
3049 }
3050 else
3051 if (PGMIsUsingLargePages(pVM))
3052 {
3053 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3054 if (RT_SUCCESS(rc))
3055 {
3056 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3057 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3058 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3059 }
3060 else
3061 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3062 }
3063
3064 if (HCPhys != NIL_RTHCPHYS)
3065 {
3066 PdeDst.u &= X86_PDE_AVL_MASK;
3067 PdeDst.u |= HCPhys;
3068 PdeDst.n.u1Present = 1;
3069 PdeDst.n.u1Write = 1;
3070 PdeDst.b.u1Size = 1;
3071# if PGM_SHW_TYPE == PGM_TYPE_EPT
3072 PdeDst.n.u1Execute = 1;
3073 PdeDst.b.u1IgnorePAT = 1;
3074 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3075# else
3076 PdeDst.n.u1User = 1;
3077# endif
3078 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3079
3080 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3081 /* Add a reference to the first page only. */
3082 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3083
3084 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3085 return VINF_SUCCESS;
3086 }
3087 }
3088 }
3089# endif /* HC_ARCH_BITS == 64 */
3090
3091 GSTPDE PdeSrc;
3092 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3093 PdeSrc.n.u1Present = 1;
3094 PdeSrc.n.u1Write = 1;
3095 PdeSrc.n.u1Accessed = 1;
3096 PdeSrc.n.u1User = 1;
3097
3098 /*
3099 * Allocate & map the page table.
3100 */
3101 PSHWPT pPTDst;
3102 PPGMPOOLPAGE pShwPage;
3103 RTGCPHYS GCPhys;
3104
3105 /* Virtual address = physical address */
3106 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3107 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3108
3109 if ( rc == VINF_SUCCESS
3110 || rc == VINF_PGM_CACHED_PAGE)
3111 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3112 else
3113 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3114
3115 PdeDst.u &= X86_PDE_AVL_MASK;
3116 PdeDst.u |= pShwPage->Core.Key;
3117 PdeDst.n.u1Present = 1;
3118 PdeDst.n.u1Write = 1;
3119# if PGM_SHW_TYPE == PGM_TYPE_EPT
3120 PdeDst.n.u1Execute = 1;
3121# else
3122 PdeDst.n.u1User = 1;
3123 PdeDst.n.u1Accessed = 1;
3124# endif
3125 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3126
3127 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3128 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3129 return rc;
3130
3131#else
3132 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3133 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3134 return VERR_INTERNAL_ERROR;
3135#endif
3136}
3137
3138
3139
3140/**
3141 * Prefetch a page/set of pages.
3142 *
3143 * Typically used to sync commonly used pages before entering raw mode
3144 * after a CR3 reload.
3145 *
3146 * @returns VBox status code.
3147 * @param pVCpu The VMCPU handle.
3148 * @param GCPtrPage Page to invalidate.
3149 */
3150PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3151{
3152#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3153 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3154 /*
3155 * Check that all Guest levels thru the PDE are present, getting the
3156 * PD and PDE in the processes.
3157 */
3158 int rc = VINF_SUCCESS;
3159# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3160# if PGM_GST_TYPE == PGM_TYPE_32BIT
3161 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3162 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3163# elif PGM_GST_TYPE == PGM_TYPE_PAE
3164 unsigned iPDSrc;
3165 X86PDPE PdpeSrc;
3166 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3167 if (!pPDSrc)
3168 return VINF_SUCCESS; /* not present */
3169# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3170 unsigned iPDSrc;
3171 PX86PML4E pPml4eSrc;
3172 X86PDPE PdpeSrc;
3173 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3174 if (!pPDSrc)
3175 return VINF_SUCCESS; /* not present */
3176# endif
3177 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3178# else
3179 PGSTPD pPDSrc = NULL;
3180 const unsigned iPDSrc = 0;
3181 GSTPDE PdeSrc;
3182
3183 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3184 PdeSrc.n.u1Present = 1;
3185 PdeSrc.n.u1Write = 1;
3186 PdeSrc.n.u1Accessed = 1;
3187 PdeSrc.n.u1User = 1;
3188# endif
3189
3190 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3191 {
3192 PVM pVM = pVCpu->CTX_SUFF(pVM);
3193 pgmLock(pVM);
3194
3195# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3196 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3197# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3198 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3199 PX86PDPAE pPDDst;
3200 X86PDEPAE PdeDst;
3201# if PGM_GST_TYPE != PGM_TYPE_PAE
3202 X86PDPE PdpeSrc;
3203
3204 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3205 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3206# endif
3207 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3208 if (rc != VINF_SUCCESS)
3209 {
3210 pgmUnlock(pVM);
3211 AssertRC(rc);
3212 return rc;
3213 }
3214 Assert(pPDDst);
3215 PdeDst = pPDDst->a[iPDDst];
3216
3217# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3218 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3219 PX86PDPAE pPDDst;
3220 X86PDEPAE PdeDst;
3221
3222# if PGM_GST_TYPE == PGM_TYPE_PROT
3223 /* AMD-V nested paging */
3224 X86PML4E Pml4eSrc;
3225 X86PDPE PdpeSrc;
3226 PX86PML4E pPml4eSrc = &Pml4eSrc;
3227
3228 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3229 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3230 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3231# endif
3232
3233 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3234 if (rc != VINF_SUCCESS)
3235 {
3236 pgmUnlock(pVM);
3237 AssertRC(rc);
3238 return rc;
3239 }
3240 Assert(pPDDst);
3241 PdeDst = pPDDst->a[iPDDst];
3242# endif
3243 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3244 {
3245 if (!PdeDst.n.u1Present)
3246 {
3247 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3248 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3249 }
3250 else
3251 {
3252 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3253 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3254 * makes no sense to prefetch more than one page.
3255 */
3256 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3257 if (RT_SUCCESS(rc))
3258 rc = VINF_SUCCESS;
3259 }
3260 }
3261 pgmUnlock(pVM);
3262 }
3263 return rc;
3264
3265#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3266 return VINF_SUCCESS; /* ignore */
3267#endif
3268}
3269
3270
3271
3272
3273/**
3274 * Syncs a page during a PGMVerifyAccess() call.
3275 *
3276 * @returns VBox status code (informational included).
3277 * @param pVCpu The VMCPU handle.
3278 * @param GCPtrPage The address of the page to sync.
3279 * @param fPage The effective guest page flags.
3280 * @param uErr The trap error code.
3281 */
3282PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3283{
3284 PVM pVM = pVCpu->CTX_SUFF(pVM);
3285
3286 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3287
3288 Assert(!HWACCMIsNestedPagingActive(pVM));
3289#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3290 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3291
3292# ifndef IN_RING0
3293 if (!(fPage & X86_PTE_US))
3294 {
3295 /*
3296 * Mark this page as safe.
3297 */
3298 /** @todo not correct for pages that contain both code and data!! */
3299 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3300 CSAMMarkPage(pVM, GCPtrPage, true);
3301 }
3302# endif
3303
3304 /*
3305 * Get guest PD and index.
3306 */
3307# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3308# if PGM_GST_TYPE == PGM_TYPE_32BIT
3309 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3310 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3311# elif PGM_GST_TYPE == PGM_TYPE_PAE
3312 unsigned iPDSrc = 0;
3313 X86PDPE PdpeSrc;
3314 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3315
3316 if (pPDSrc)
3317 {
3318 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3319 return VINF_EM_RAW_GUEST_TRAP;
3320 }
3321# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3322 unsigned iPDSrc;
3323 PX86PML4E pPml4eSrc;
3324 X86PDPE PdpeSrc;
3325 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3326 if (!pPDSrc)
3327 {
3328 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3329 return VINF_EM_RAW_GUEST_TRAP;
3330 }
3331# endif
3332# else
3333 PGSTPD pPDSrc = NULL;
3334 const unsigned iPDSrc = 0;
3335# endif
3336 int rc = VINF_SUCCESS;
3337
3338 pgmLock(pVM);
3339
3340 /*
3341 * First check if the shadow pd is present.
3342 */
3343# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3344 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3345# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3346 PX86PDEPAE pPdeDst;
3347 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3348 PX86PDPAE pPDDst;
3349# if PGM_GST_TYPE != PGM_TYPE_PAE
3350 X86PDPE PdpeSrc;
3351
3352 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3353 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3354# endif
3355 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3356 if (rc != VINF_SUCCESS)
3357 {
3358 pgmUnlock(pVM);
3359 AssertRC(rc);
3360 return rc;
3361 }
3362 Assert(pPDDst);
3363 pPdeDst = &pPDDst->a[iPDDst];
3364
3365# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3366 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3367 PX86PDPAE pPDDst;
3368 PX86PDEPAE pPdeDst;
3369
3370# if PGM_GST_TYPE == PGM_TYPE_PROT
3371 /* AMD-V nested paging */
3372 X86PML4E Pml4eSrc;
3373 X86PDPE PdpeSrc;
3374 PX86PML4E pPml4eSrc = &Pml4eSrc;
3375
3376 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3377 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3378 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3379# endif
3380
3381 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3382 if (rc != VINF_SUCCESS)
3383 {
3384 pgmUnlock(pVM);
3385 AssertRC(rc);
3386 return rc;
3387 }
3388 Assert(pPDDst);
3389 pPdeDst = &pPDDst->a[iPDDst];
3390# endif
3391
3392# if defined(IN_RC)
3393 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3394 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3395# endif
3396
3397 if (!pPdeDst->n.u1Present)
3398 {
3399 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3400 if (rc != VINF_SUCCESS)
3401 {
3402# if defined(IN_RC)
3403 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3404 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3405# endif
3406 pgmUnlock(pVM);
3407 AssertRC(rc);
3408 return rc;
3409 }
3410 }
3411
3412# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3413 /* Check for dirty bit fault */
3414 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3415 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3416 Log(("PGMVerifyAccess: success (dirty)\n"));
3417 else
3418 {
3419 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3420# else
3421 {
3422 GSTPDE PdeSrc;
3423 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3424 PdeSrc.n.u1Present = 1;
3425 PdeSrc.n.u1Write = 1;
3426 PdeSrc.n.u1Accessed = 1;
3427 PdeSrc.n.u1User = 1;
3428
3429# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3430 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3431 if (uErr & X86_TRAP_PF_US)
3432 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3433 else /* supervisor */
3434 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3435
3436 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3437 if (RT_SUCCESS(rc))
3438 {
3439 /* Page was successfully synced */
3440 Log2(("PGMVerifyAccess: success (sync)\n"));
3441 rc = VINF_SUCCESS;
3442 }
3443 else
3444 {
3445 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3446 rc = VINF_EM_RAW_GUEST_TRAP;
3447 }
3448 }
3449# if defined(IN_RC)
3450 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3451 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3452# endif
3453 pgmUnlock(pVM);
3454 return rc;
3455
3456#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3457
3458 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3459 return VERR_INTERNAL_ERROR;
3460#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3461}
3462
3463
3464/**
3465 * Syncs the paging hierarchy starting at CR3.
3466 *
3467 * @returns VBox status code, no specials.
3468 * @param pVCpu The VMCPU handle.
3469 * @param cr0 Guest context CR0 register
3470 * @param cr3 Guest context CR3 register
3471 * @param cr4 Guest context CR4 register
3472 * @param fGlobal Including global page directories or not
3473 */
3474PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3475{
3476 PVM pVM = pVCpu->CTX_SUFF(pVM);
3477
3478 LogFlow(("SyncCR3 fGlobal=%d\n", !!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
3479
3480#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3481
3482 pgmLock(pVM);
3483
3484# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3485 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3486 if (pPool->cDirtyPages)
3487 pgmPoolResetDirtyPages(pVM);
3488# endif
3489
3490 /*
3491 * Update page access handlers.
3492 * The virtual are always flushed, while the physical are only on demand.
3493 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3494 * have to look into that later because it will have a bad influence on the performance.
3495 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3496 * bird: Yes, but that won't work for aliases.
3497 */
3498 /** @todo this MUST go away. See #1557. */
3499 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3500 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3501 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3502 pgmUnlock(pVM);
3503#endif /* !NESTED && !EPT */
3504
3505#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3506 /*
3507 * Nested / EPT - almost no work.
3508 */
3509 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3510 return VINF_SUCCESS;
3511
3512#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3513 /*
3514 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3515 * out the shadow parts when the guest modifies its tables.
3516 */
3517 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3518 return VINF_SUCCESS;
3519
3520#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3521
3522# ifndef PGM_WITHOUT_MAPPINGS
3523 /*
3524 * Check for and resolve conflicts with our guest mappings if they
3525 * are enabled and not fixed.
3526 */
3527 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
3528 {
3529 int rc = pgmMapResolveConflicts(pVM);
3530 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3531 if (rc == VINF_PGM_SYNC_CR3)
3532 {
3533 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3534 return VINF_PGM_SYNC_CR3;
3535 }
3536 }
3537# else
3538 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3539# endif
3540 return VINF_SUCCESS;
3541#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3542}
3543
3544
3545
3546
3547#ifdef VBOX_STRICT
3548#ifdef IN_RC
3549# undef AssertMsgFailed
3550# define AssertMsgFailed Log
3551#endif
3552#ifdef IN_RING3
3553# include <VBox/dbgf.h>
3554
3555/**
3556 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3557 *
3558 * @returns VBox status code (VINF_SUCCESS).
3559 * @param cr3 The root of the hierarchy.
3560 * @param crr The cr4, only PAE and PSE is currently used.
3561 * @param fLongMode Set if long mode, false if not long mode.
3562 * @param cMaxDepth Number of levels to dump.
3563 * @param pHlp Pointer to the output functions.
3564 */
3565RT_C_DECLS_BEGIN
3566VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3567RT_C_DECLS_END
3568
3569#endif
3570
3571/**
3572 * Checks that the shadow page table is in sync with the guest one.
3573 *
3574 * @returns The number of errors.
3575 * @param pVM The virtual machine.
3576 * @param pVCpu The VMCPU handle.
3577 * @param cr3 Guest context CR3 register
3578 * @param cr4 Guest context CR4 register
3579 * @param GCPtr Where to start. Defaults to 0.
3580 * @param cb How much to check. Defaults to everything.
3581 */
3582PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3583{
3584#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3585 return 0;
3586#else
3587 unsigned cErrors = 0;
3588 PVM pVM = pVCpu->CTX_SUFF(pVM);
3589 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3590
3591#if PGM_GST_TYPE == PGM_TYPE_PAE
3592 /** @todo currently broken; crashes below somewhere */
3593 AssertFailed();
3594#endif
3595
3596#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3597 || PGM_GST_TYPE == PGM_TYPE_PAE \
3598 || PGM_GST_TYPE == PGM_TYPE_AMD64
3599
3600# if PGM_GST_TYPE == PGM_TYPE_32BIT
3601 bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
3602# else
3603 bool fBigPagesSupported = true;
3604# endif
3605 PPGMCPU pPGM = &pVCpu->pgm.s;
3606 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3607 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3608# ifndef IN_RING0
3609 RTHCPHYS HCPhys; /* general usage. */
3610# endif
3611 int rc;
3612
3613 /*
3614 * Check that the Guest CR3 and all its mappings are correct.
3615 */
3616 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3617 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3618 false);
3619# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3620# if PGM_GST_TYPE == PGM_TYPE_32BIT
3621 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3622# else
3623 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3624# endif
3625 AssertRCReturn(rc, 1);
3626 HCPhys = NIL_RTHCPHYS;
3627 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3628 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3629# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3630 pgmGstGet32bitPDPtr(pPGM);
3631 RTGCPHYS GCPhys;
3632 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3633 AssertRCReturn(rc, 1);
3634 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3635# endif
3636# endif /* !IN_RING0 */
3637
3638 /*
3639 * Get and check the Shadow CR3.
3640 */
3641# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3642 unsigned cPDEs = X86_PG_ENTRIES;
3643 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3644# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3645# if PGM_GST_TYPE == PGM_TYPE_32BIT
3646 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3647# else
3648 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3649# endif
3650 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3651# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3652 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3653 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3654# endif
3655 if (cb != ~(RTGCPTR)0)
3656 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3657
3658/** @todo call the other two PGMAssert*() functions. */
3659
3660# if PGM_GST_TYPE == PGM_TYPE_AMD64
3661 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3662
3663 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3664 {
3665 PPGMPOOLPAGE pShwPdpt = NULL;
3666 PX86PML4E pPml4eSrc;
3667 PX86PML4E pPml4eDst;
3668 RTGCPHYS GCPhysPdptSrc;
3669
3670 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3671 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3672
3673 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3674 if (!pPml4eDst->n.u1Present)
3675 {
3676 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3677 continue;
3678 }
3679
3680 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3681 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3682
3683 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3684 {
3685 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3686 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3687 cErrors++;
3688 continue;
3689 }
3690
3691 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3692 {
3693 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3694 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3695 cErrors++;
3696 continue;
3697 }
3698
3699 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3700 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3701 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3702 {
3703 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3704 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3705 cErrors++;
3706 continue;
3707 }
3708# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3709 {
3710# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3711
3712# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3713 /*
3714 * Check the PDPTEs too.
3715 */
3716 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3717
3718 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3719 {
3720 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3721 PPGMPOOLPAGE pShwPde = NULL;
3722 PX86PDPE pPdpeDst;
3723 RTGCPHYS GCPhysPdeSrc;
3724# if PGM_GST_TYPE == PGM_TYPE_PAE
3725 X86PDPE PdpeSrc;
3726 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3727 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3728# else
3729 PX86PML4E pPml4eSrcIgn;
3730 X86PDPE PdpeSrc;
3731 PX86PDPT pPdptDst;
3732 PX86PDPAE pPDDst;
3733 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3734
3735 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3736 if (rc != VINF_SUCCESS)
3737 {
3738 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3739 GCPtr += 512 * _2M;
3740 continue; /* next PDPTE */
3741 }
3742 Assert(pPDDst);
3743# endif
3744 Assert(iPDSrc == 0);
3745
3746 pPdpeDst = &pPdptDst->a[iPdpt];
3747
3748 if (!pPdpeDst->n.u1Present)
3749 {
3750 GCPtr += 512 * _2M;
3751 continue; /* next PDPTE */
3752 }
3753
3754 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3755 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3756
3757 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3758 {
3759 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3760 GCPtr += 512 * _2M;
3761 cErrors++;
3762 continue;
3763 }
3764
3765 if (GCPhysPdeSrc != pShwPde->GCPhys)
3766 {
3767# if PGM_GST_TYPE == PGM_TYPE_AMD64
3768 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3769# else
3770 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3771# endif
3772 GCPtr += 512 * _2M;
3773 cErrors++;
3774 continue;
3775 }
3776
3777# if PGM_GST_TYPE == PGM_TYPE_AMD64
3778 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3779 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3780 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3781 {
3782 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3783 GCPtr += 512 * _2M;
3784 cErrors++;
3785 continue;
3786 }
3787# endif
3788
3789# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3790 {
3791# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3792# if PGM_GST_TYPE == PGM_TYPE_32BIT
3793 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3794# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3795 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3796# endif
3797# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3798 /*
3799 * Iterate the shadow page directory.
3800 */
3801 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3802 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3803
3804 for (;
3805 iPDDst < cPDEs;
3806 iPDDst++, GCPtr += cIncrement)
3807 {
3808# if PGM_SHW_TYPE == PGM_TYPE_PAE
3809 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3810# else
3811 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3812# endif
3813 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3814 {
3815 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3816 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3817 {
3818 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3819 cErrors++;
3820 continue;
3821 }
3822 }
3823 else if ( (PdeDst.u & X86_PDE_P)
3824 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3825 )
3826 {
3827 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3828 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3829 if (!pPoolPage)
3830 {
3831 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3832 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3833 cErrors++;
3834 continue;
3835 }
3836 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3837
3838 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3839 {
3840 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3841 GCPtr, (uint64_t)PdeDst.u));
3842 cErrors++;
3843 }
3844
3845 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3846 {
3847 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3848 GCPtr, (uint64_t)PdeDst.u));
3849 cErrors++;
3850 }
3851
3852 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3853 if (!PdeSrc.n.u1Present)
3854 {
3855 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3856 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3857 cErrors++;
3858 continue;
3859 }
3860
3861 if ( !PdeSrc.b.u1Size
3862 || !fBigPagesSupported)
3863 {
3864 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3865# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3866 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3867# endif
3868 }
3869 else
3870 {
3871# if PGM_GST_TYPE == PGM_TYPE_32BIT
3872 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3873 {
3874 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3875 GCPtr, (uint64_t)PdeSrc.u));
3876 cErrors++;
3877 continue;
3878 }
3879# endif
3880 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3881# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3882 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3883# endif
3884 }
3885
3886 if ( pPoolPage->enmKind
3887 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3888 {
3889 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3890 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3891 cErrors++;
3892 }
3893
3894 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3895 if (!pPhysPage)
3896 {
3897 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3898 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3899 cErrors++;
3900 continue;
3901 }
3902
3903 if (GCPhysGst != pPoolPage->GCPhys)
3904 {
3905 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3906 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3907 cErrors++;
3908 continue;
3909 }
3910
3911 if ( !PdeSrc.b.u1Size
3912 || !fBigPagesSupported)
3913 {
3914 /*
3915 * Page Table.
3916 */
3917 const GSTPT *pPTSrc;
3918 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3919 if (RT_FAILURE(rc))
3920 {
3921 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3922 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3923 cErrors++;
3924 continue;
3925 }
3926 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3927 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3928 {
3929 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3930 // (This problem will go away when/if we shadow multiple CR3s.)
3931 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3932 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3933 cErrors++;
3934 continue;
3935 }
3936 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3937 {
3938 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3939 GCPtr, (uint64_t)PdeDst.u));
3940 cErrors++;
3941 continue;
3942 }
3943
3944 /* iterate the page table. */
3945# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3946 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3947 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3948# else
3949 const unsigned offPTSrc = 0;
3950# endif
3951 for (unsigned iPT = 0, off = 0;
3952 iPT < RT_ELEMENTS(pPTDst->a);
3953 iPT++, off += PAGE_SIZE)
3954 {
3955 const SHWPTE PteDst = pPTDst->a[iPT];
3956
3957 /* skip not-present entries. */
3958 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3959 continue;
3960 Assert(PteDst.n.u1Present);
3961
3962 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3963 if (!PteSrc.n.u1Present)
3964 {
3965# ifdef IN_RING3
3966 PGMAssertHandlerAndFlagsInSync(pVM);
3967 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3968# endif
3969 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3970 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3971 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3972 cErrors++;
3973 continue;
3974 }
3975
3976 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3977# if 1 /** @todo sync accessed bit properly... */
3978 fIgnoreFlags |= X86_PTE_A;
3979# endif
3980
3981 /* match the physical addresses */
3982 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3983 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3984
3985# ifdef IN_RING3
3986 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3987 if (RT_FAILURE(rc))
3988 {
3989 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3990 {
3991 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3992 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3993 cErrors++;
3994 continue;
3995 }
3996 }
3997 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3998 {
3999 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4000 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4001 cErrors++;
4002 continue;
4003 }
4004# endif
4005
4006 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4007 if (!pPhysPage)
4008 {
4009# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4010 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4011 {
4012 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4013 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4014 cErrors++;
4015 continue;
4016 }
4017# endif
4018 if (PteDst.n.u1Write)
4019 {
4020 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4021 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4022 cErrors++;
4023 }
4024 fIgnoreFlags |= X86_PTE_RW;
4025 }
4026 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4027 {
4028 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4029 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4030 cErrors++;
4031 continue;
4032 }
4033
4034 /* flags */
4035 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4036 {
4037 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4038 {
4039 if (PteDst.n.u1Write)
4040 {
4041 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4042 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4043 cErrors++;
4044 continue;
4045 }
4046 fIgnoreFlags |= X86_PTE_RW;
4047 }
4048 else
4049 {
4050 if (PteDst.n.u1Present)
4051 {
4052 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4053 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4054 cErrors++;
4055 continue;
4056 }
4057 fIgnoreFlags |= X86_PTE_P;
4058 }
4059 }
4060 else
4061 {
4062 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4063 {
4064 if (PteDst.n.u1Write)
4065 {
4066 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4067 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4068 cErrors++;
4069 continue;
4070 }
4071 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4072 {
4073 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4074 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4075 cErrors++;
4076 continue;
4077 }
4078 if (PteDst.n.u1Dirty)
4079 {
4080 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4081 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4082 cErrors++;
4083 }
4084# if 0 /** @todo sync access bit properly... */
4085 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4086 {
4087 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4088 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4089 cErrors++;
4090 }
4091 fIgnoreFlags |= X86_PTE_RW;
4092# else
4093 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4094# endif
4095 }
4096 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4097 {
4098 /* access bit emulation (not implemented). */
4099 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4100 {
4101 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4102 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4103 cErrors++;
4104 continue;
4105 }
4106 if (!PteDst.n.u1Accessed)
4107 {
4108 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4109 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4110 cErrors++;
4111 }
4112 fIgnoreFlags |= X86_PTE_P;
4113 }
4114# ifdef DEBUG_sandervl
4115 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4116# endif
4117 }
4118
4119 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4120 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4121 )
4122 {
4123 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4124 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4125 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4126 cErrors++;
4127 continue;
4128 }
4129 } /* foreach PTE */
4130 }
4131 else
4132 {
4133 /*
4134 * Big Page.
4135 */
4136 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4137 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4138 {
4139 if (PdeDst.n.u1Write)
4140 {
4141 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4142 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4143 cErrors++;
4144 continue;
4145 }
4146 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4147 {
4148 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4149 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4150 cErrors++;
4151 continue;
4152 }
4153# if 0 /** @todo sync access bit properly... */
4154 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4155 {
4156 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4157 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4158 cErrors++;
4159 }
4160 fIgnoreFlags |= X86_PTE_RW;
4161# else
4162 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4163# endif
4164 }
4165 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4166 {
4167 /* access bit emulation (not implemented). */
4168 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4169 {
4170 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4171 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4172 cErrors++;
4173 continue;
4174 }
4175 if (!PdeDst.n.u1Accessed)
4176 {
4177 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4178 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4179 cErrors++;
4180 }
4181 fIgnoreFlags |= X86_PTE_P;
4182 }
4183
4184 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4185 {
4186 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4187 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4188 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4189 cErrors++;
4190 }
4191
4192 /* iterate the page table. */
4193 for (unsigned iPT = 0, off = 0;
4194 iPT < RT_ELEMENTS(pPTDst->a);
4195 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4196 {
4197 const SHWPTE PteDst = pPTDst->a[iPT];
4198
4199 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4200 {
4201 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4202 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4203 cErrors++;
4204 }
4205
4206 /* skip not-present entries. */
4207 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4208 continue;
4209
4210 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4211
4212 /* match the physical addresses */
4213 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4214
4215# ifdef IN_RING3
4216 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4217 if (RT_FAILURE(rc))
4218 {
4219 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4220 {
4221 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4222 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4223 cErrors++;
4224 }
4225 }
4226 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4227 {
4228 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4229 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4230 cErrors++;
4231 continue;
4232 }
4233# endif
4234 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4235 if (!pPhysPage)
4236 {
4237# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4238 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4239 {
4240 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4241 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4242 cErrors++;
4243 continue;
4244 }
4245# endif
4246 if (PteDst.n.u1Write)
4247 {
4248 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4249 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4250 cErrors++;
4251 }
4252 fIgnoreFlags |= X86_PTE_RW;
4253 }
4254 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4255 {
4256 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4257 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4258 cErrors++;
4259 continue;
4260 }
4261
4262 /* flags */
4263 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4264 {
4265 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4266 {
4267 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4268 {
4269 if (PteDst.n.u1Write)
4270 {
4271 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4272 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4273 cErrors++;
4274 continue;
4275 }
4276 fIgnoreFlags |= X86_PTE_RW;
4277 }
4278 }
4279 else
4280 {
4281 if (PteDst.n.u1Present)
4282 {
4283 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4284 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4285 cErrors++;
4286 continue;
4287 }
4288 fIgnoreFlags |= X86_PTE_P;
4289 }
4290 }
4291
4292 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4293 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4294 )
4295 {
4296 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4297 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4298 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4299 cErrors++;
4300 continue;
4301 }
4302 } /* for each PTE */
4303 }
4304 }
4305 /* not present */
4306
4307 } /* for each PDE */
4308
4309 } /* for each PDPTE */
4310
4311 } /* for each PML4E */
4312
4313# ifdef DEBUG
4314 if (cErrors)
4315 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4316# endif
4317
4318#endif /* GST == 32BIT, PAE or AMD64 */
4319 return cErrors;
4320
4321#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4322}
4323#endif /* VBOX_STRICT */
4324
4325
4326/**
4327 * Sets up the CR3 for shadow paging
4328 *
4329 * @returns Strict VBox status code.
4330 * @retval VINF_SUCCESS.
4331 *
4332 * @param pVCpu The VMCPU handle.
4333 * @param GCPhysCR3 The physical address in the CR3 register.
4334 */
4335PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4336{
4337 PVM pVM = pVCpu->CTX_SUFF(pVM);
4338
4339 /* Update guest paging info. */
4340#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4341 || PGM_GST_TYPE == PGM_TYPE_PAE \
4342 || PGM_GST_TYPE == PGM_TYPE_AMD64
4343
4344 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4345
4346 /*
4347 * Map the page CR3 points at.
4348 */
4349 RTHCPTR HCPtrGuestCR3;
4350 RTHCPHYS HCPhysGuestCR3;
4351 pgmLock(pVM);
4352 PPGMPAGE pPageCR3 = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4353 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4354 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4355 /** @todo this needs some reworking wrt. locking. */
4356# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4357 HCPtrGuestCR3 = NIL_RTHCPTR;
4358 int rc = VINF_SUCCESS;
4359# else
4360 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4361# endif
4362 pgmUnlock(pVM);
4363 if (RT_SUCCESS(rc))
4364 {
4365 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4366 if (RT_SUCCESS(rc))
4367 {
4368# ifdef IN_RC
4369 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4370# endif
4371# if PGM_GST_TYPE == PGM_TYPE_32BIT
4372 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4373# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4374 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4375# endif
4376 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4377
4378# elif PGM_GST_TYPE == PGM_TYPE_PAE
4379 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4380 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4381# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4382 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4383# endif
4384 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4385 Log(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4386
4387 /*
4388 * Map the 4 PDs too.
4389 */
4390 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVCpu->pgm.s);
4391 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4392 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4393 {
4394 if (pGuestPDPT->a[i].n.u1Present)
4395 {
4396 RTHCPTR HCPtr;
4397 RTHCPHYS HCPhys;
4398 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4399 pgmLock(pVM);
4400 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4401 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4402 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4403# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4404 HCPtr = NIL_RTHCPTR;
4405 int rc2 = VINF_SUCCESS;
4406# else
4407 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4408# endif
4409 pgmUnlock(pVM);
4410 if (RT_SUCCESS(rc2))
4411 {
4412 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4413 AssertRCReturn(rc, rc);
4414
4415 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4416# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4417 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4418# endif
4419 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4420 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4421# ifdef IN_RC
4422 PGM_INVL_PG(pVCpu, GCPtr);
4423# endif
4424 continue;
4425 }
4426 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4427 }
4428
4429 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4430# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4431 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4432# endif
4433 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4434 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4435# ifdef IN_RC
4436 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4437# endif
4438 }
4439
4440# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4441 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4442# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4443 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4444# endif
4445# endif
4446 }
4447 else
4448 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4449 }
4450 else
4451 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4452
4453#else /* prot/real stub */
4454 int rc = VINF_SUCCESS;
4455#endif
4456
4457 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4458# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4459 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4460 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4461 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4462 && PGM_GST_TYPE != PGM_TYPE_PROT))
4463
4464 Assert(!HWACCMIsNestedPagingActive(pVM));
4465
4466 /*
4467 * Update the shadow root page as well since that's not fixed.
4468 */
4469 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4470 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4471 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4472 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4473 PPGMPOOLPAGE pNewShwPageCR3;
4474
4475 pgmLock(pVM);
4476
4477# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4478 if (pPool->cDirtyPages)
4479 pgmPoolResetDirtyPages(pVM);
4480# endif
4481
4482 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4483 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4484 AssertFatalRC(rc);
4485 rc = VINF_SUCCESS;
4486
4487# ifdef IN_RC
4488 /*
4489 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4490 * state will be inconsistent! Flush important things now while
4491 * we still can and then make sure there are no ring-3 calls.
4492 */
4493 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4494 VMMRZCallRing3Disable(pVCpu);
4495# endif
4496
4497 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4498 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4499 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4500# ifdef IN_RING0
4501 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4502 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4503# elif defined(IN_RC)
4504 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4505 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4506# else
4507 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4508 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4509# endif
4510
4511# ifndef PGM_WITHOUT_MAPPINGS
4512 /*
4513 * Apply all hypervisor mappings to the new CR3.
4514 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4515 * make sure we check for conflicts in the new CR3 root.
4516 */
4517# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4518 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4519# endif
4520 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4521 AssertRCReturn(rc, rc);
4522# endif
4523
4524 /* Set the current hypervisor CR3. */
4525 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4526 SELMShadowCR3Changed(pVM, pVCpu);
4527
4528# ifdef IN_RC
4529 /* NOTE: The state is consistent again. */
4530 VMMRZCallRing3Enable(pVCpu);
4531# endif
4532
4533 /* Clean up the old CR3 root. */
4534 if ( pOldShwPageCR3
4535 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4536 {
4537 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4538# ifndef PGM_WITHOUT_MAPPINGS
4539 /* Remove the hypervisor mappings from the shadow page table. */
4540 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4541# endif
4542 /* Mark the page as unlocked; allow flushing again. */
4543 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4544
4545 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4546 }
4547 pgmUnlock(pVM);
4548# endif
4549
4550 return rc;
4551}
4552
4553/**
4554 * Unmaps the shadow CR3.
4555 *
4556 * @returns VBox status, no specials.
4557 * @param pVCpu The VMCPU handle.
4558 */
4559PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4560{
4561 LogFlow(("UnmapCR3\n"));
4562
4563 int rc = VINF_SUCCESS;
4564 PVM pVM = pVCpu->CTX_SUFF(pVM);
4565
4566 /*
4567 * Update guest paging info.
4568 */
4569#if PGM_GST_TYPE == PGM_TYPE_32BIT
4570 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4571# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4572 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4573# endif
4574 pVCpu->pgm.s.pGst32BitPdRC = 0;
4575
4576#elif PGM_GST_TYPE == PGM_TYPE_PAE
4577 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4578# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4579 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4580# endif
4581 pVCpu->pgm.s.pGstPaePdptRC = 0;
4582 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4583 {
4584 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4585# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4586 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4587# endif
4588 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4589 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4590 }
4591
4592#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4593 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4594# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4595 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4596# endif
4597
4598#else /* prot/real mode stub */
4599 /* nothing to do */
4600#endif
4601
4602#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4603 /*
4604 * Update shadow paging info.
4605 */
4606# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4607 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4608 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4609
4610# if PGM_GST_TYPE != PGM_TYPE_REAL
4611 Assert(!HWACCMIsNestedPagingActive(pVM));
4612# endif
4613
4614 pgmLock(pVM);
4615
4616# ifndef PGM_WITHOUT_MAPPINGS
4617 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4618 /* Remove the hypervisor mappings from the shadow page table. */
4619 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4620# endif
4621
4622 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4623 {
4624 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4625
4626 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4627
4628# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4629 if (pPool->cDirtyPages)
4630 pgmPoolResetDirtyPages(pVM);
4631# endif
4632
4633 /* Mark the page as unlocked; allow flushing again. */
4634 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4635
4636 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4637 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4638 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4639 pVCpu->pgm.s.pShwPageCR3RC = 0;
4640 pVCpu->pgm.s.iShwUser = 0;
4641 pVCpu->pgm.s.iShwUserTable = 0;
4642 }
4643 pgmUnlock(pVM);
4644# endif
4645#endif /* !IN_RC*/
4646
4647 return rc;
4648}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette