VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 19835

Last change on this file since 19835 was 19835, checked in by vboxsync, 16 years ago

Comment update

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 194.1 KB
Line 
1/* $Id: PGMAllBth.h 19835 2009-05-19 15:20:37Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27__BEGIN_DECLS
28PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
43PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
44__END_DECLS
45
46
47/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
48#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
49# error "Invalid combination; PAE guest implies PAE shadow"
50#endif
51
52#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
53 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
54# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
55#endif
56
57#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
58 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
59# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
60#endif
61
62#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
63 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
64# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
65#endif
66
67#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
68# define PGM_WITHOUT_MAPPINGS
69#endif
70
71
72#ifndef IN_RING3
73/**
74 * #PF Handler for raw-mode guest execution.
75 *
76 * @returns VBox status code (appropriate for trap handling and GC return).
77 *
78 * @param pVCpu VMCPU Handle.
79 * @param uErr The trap error code.
80 * @param pRegFrame Trap register frame.
81 * @param pvFault The fault address.
82 */
83PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
84{
85 PVM pVM = pVCpu->CTX_SUFF(pVM);
86
87# if defined(IN_RC) && defined(VBOX_STRICT)
88 PGMDynCheckLocks(pVM);
89# endif
90
91# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
92 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
93 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
94
95# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
96 /*
97 * Hide the instruction fetch trap indicator for now.
98 */
99 /** @todo NXE will change this and we must fix NXE in the switcher too! */
100 if (uErr & X86_TRAP_PF_ID)
101 {
102 uErr &= ~X86_TRAP_PF_ID;
103 TRPMSetErrorCode(pVCpu, uErr);
104 }
105# endif
106
107 /*
108 * Get PDs.
109 */
110 int rc;
111# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
112# if PGM_GST_TYPE == PGM_TYPE_32BIT
113 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
114 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
115
116# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
117
118# if PGM_GST_TYPE == PGM_TYPE_PAE
119 unsigned iPDSrc;
120 X86PDPE PdpeSrc;
121 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
122
123# elif PGM_GST_TYPE == PGM_TYPE_AMD64
124 unsigned iPDSrc;
125 PX86PML4E pPml4eSrc;
126 X86PDPE PdpeSrc;
127 PGSTPD pPDSrc;
128
129 pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
130 Assert(pPml4eSrc);
131# endif
132
133 /* Quick check for a valid guest trap. (PAE & AMD64) */
134 if (!pPDSrc)
135 {
136# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
137 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
138# else
139 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
140# endif
141 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
142 TRPMSetErrorCode(pVCpu, uErr);
143 return VINF_EM_RAW_GUEST_TRAP;
144 }
145# endif
146
147# else /* !PGM_WITH_PAGING */
148 PGSTPD pPDSrc = NULL;
149 const unsigned iPDSrc = 0;
150# endif /* !PGM_WITH_PAGING */
151
152 /* Fetch the guest PDE */
153# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
154 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
155# else
156 GSTPDE PdeSrc;
157 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
158 PdeSrc.n.u1Present = 1;
159 PdeSrc.n.u1Write = 1;
160 PdeSrc.n.u1Accessed = 1;
161 PdeSrc.n.u1User = 1;
162# endif
163
164 pgmLock(pVM);
165 { /* Force the shadow pointers to go out of scope after releasing the lock. */
166# if PGM_SHW_TYPE == PGM_TYPE_32BIT
167 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
168 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
169
170# elif PGM_SHW_TYPE == PGM_TYPE_PAE
171 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
172
173 PX86PDPAE pPDDst;
174# if PGM_GST_TYPE != PGM_TYPE_PAE
175 X86PDPE PdpeSrc;
176
177 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
178 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
179# endif
180 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
181 if (rc != VINF_SUCCESS)
182 {
183 pgmUnlock(pVM);
184 AssertRC(rc);
185 return rc;
186 }
187 Assert(pPDDst);
188
189# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
190 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
191 PX86PDPAE pPDDst;
192# if PGM_GST_TYPE == PGM_TYPE_PROT
193 /* AMD-V nested paging */
194 X86PML4E Pml4eSrc;
195 X86PDPE PdpeSrc;
196 PX86PML4E pPml4eSrc = &Pml4eSrc;
197
198 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
199 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
200 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
201# endif
202
203 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
204 if (rc != VINF_SUCCESS)
205 {
206 pgmUnlock(pVM);
207 AssertRC(rc);
208 return rc;
209 }
210 Assert(pPDDst);
211
212# elif PGM_SHW_TYPE == PGM_TYPE_EPT
213 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
214 PEPTPD pPDDst;
215
216 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
217 if (rc != VINF_SUCCESS)
218 {
219 pgmUnlock(pVM);
220 AssertRC(rc);
221 return rc;
222 }
223 Assert(pPDDst);
224# endif
225
226# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
227 /*
228 * If we successfully correct the write protection fault due to dirty bit
229 * tracking, or this page fault is a genuine one, then return immediately.
230 */
231 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
232 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
233 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
234 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
235 || rc == VINF_EM_RAW_GUEST_TRAP)
236 {
237 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
238 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
239 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
240 pgmUnlock(pVM);
241 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
242 }
243
244 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
245# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
246
247 /*
248 * A common case is the not-present error caused by lazy page table syncing.
249 *
250 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
251 * so we can safely assume that the shadow PT is present when calling SyncPage later.
252 *
253 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
254 * of mapping conflict and defer to SyncCR3 in R3.
255 * (Again, we do NOT support access handlers for non-present guest pages.)
256 *
257 */
258 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
259 && !pPDDst->a[iPDDst].n.u1Present
260 && PdeSrc.n.u1Present
261 )
262 {
263 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
264 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
265 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
266 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
267 pgmUnlock(pVM);
268 if (RT_SUCCESS(rc))
269 {
270 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
271 return rc;
272 }
273 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
274 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
275 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
276 return VINF_PGM_SYNC_CR3;
277 }
278 pgmUnlock(pVM);
279 }
280
281# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
282 /*
283 * Check if this address is within any of our mappings.
284 *
285 * This is *very* fast and it's gonna save us a bit of effort below and prevent
286 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
287 * (BTW, it's impossible to have physical access handlers in a mapping.)
288 */
289 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
290 {
291 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
292 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
293 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
294 {
295 if (pvFault < pMapping->GCPtr)
296 break;
297 if (pvFault - pMapping->GCPtr < pMapping->cb)
298 {
299 /*
300 * The first thing we check is if we've got an undetected conflict.
301 */
302 if (!pVM->pgm.s.fMappingsFixed)
303 {
304 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
305 while (iPT-- > 0)
306 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
307 {
308 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
309 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
310 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
311 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
312 return VINF_PGM_SYNC_CR3;
313 }
314 }
315
316 /*
317 * Check if the fault address is in a virtual page access handler range.
318 */
319 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
320 if ( pCur
321 && pvFault - pCur->Core.Key < pCur->cb
322 && uErr & X86_TRAP_PF_RW)
323 {
324# ifdef IN_RC
325 STAM_PROFILE_START(&pCur->Stat, h);
326 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
327 STAM_PROFILE_STOP(&pCur->Stat, h);
328# else
329 AssertFailed();
330 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
331# endif
332 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
333 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
334 return rc;
335 }
336
337 /*
338 * Pretend we're not here and let the guest handle the trap.
339 */
340 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
341 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
342 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
343 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
344 return VINF_EM_RAW_GUEST_TRAP;
345 }
346 }
347 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
348 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
349# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
350
351 /*
352 * Check if this fault address is flagged for special treatment,
353 * which means we'll have to figure out the physical address and
354 * check flags associated with it.
355 *
356 * ASSUME that we can limit any special access handling to pages
357 * in page tables which the guest believes to be present.
358 */
359 if (PdeSrc.n.u1Present)
360 {
361 RTGCPHYS GCPhys = NIL_RTGCPHYS;
362
363# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
364# if PGM_GST_TYPE == PGM_TYPE_AMD64
365 bool fBigPagesSupported = true;
366# else
367 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
368# endif
369 if ( PdeSrc.b.u1Size
370 && fBigPagesSupported)
371 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
372 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
373 else
374 {
375 PGSTPT pPTSrc;
376 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
377 if (RT_SUCCESS(rc))
378 {
379 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
380 if (pPTSrc->a[iPTESrc].n.u1Present)
381 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
382 }
383 }
384# else
385 /* No paging so the fault address is the physical address */
386 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
387# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
388
389 /*
390 * If we have a GC address we'll check if it has any flags set.
391 */
392 if (GCPhys != NIL_RTGCPHYS)
393 {
394 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
395
396 PPGMPAGE pPage;
397 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
398 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
399 {
400 if ( PGM_PAGE_HAS_ACTIVE_PHYSICAL_HANDLERS(pPage)
401 || PGM_PAGE_HAS_ACTIVE_VIRTUAL_HANDLERS(pPage))
402 {
403 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
404 {
405 /*
406 * Physical page access handler.
407 */
408 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
409 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
410 if (pCur)
411 {
412# ifdef PGM_SYNC_N_PAGES
413 /*
414 * If the region is write protected and we got a page not present fault, then sync
415 * the pages. If the fault was caused by a read, then restart the instruction.
416 * In case of write access continue to the GC write handler.
417 *
418 * ASSUMES that there is only one handler per page or that they have similar write properties.
419 */
420 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
421 && !(uErr & X86_TRAP_PF_P))
422 {
423 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
424 if ( RT_FAILURE(rc)
425 || !(uErr & X86_TRAP_PF_RW)
426 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
427 {
428 AssertRC(rc);
429 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
430 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
431 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
432 return rc;
433 }
434 }
435# endif
436
437 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
438 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
439 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
440
441# if defined(IN_RC) || defined(IN_RING0)
442 if (pCur->CTX_SUFF(pfnHandler))
443 {
444 STAM_PROFILE_START(&pCur->Stat, h);
445 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pCur->CTX_SUFF(pvUser));
446 STAM_PROFILE_STOP(&pCur->Stat, h);
447 }
448 else
449# endif
450 rc = VINF_EM_RAW_EMULATE_INSTR;
451 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
452 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
453 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
454 return rc;
455 }
456 }
457# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
458 else
459 {
460# ifdef PGM_SYNC_N_PAGES
461 /*
462 * If the region is write protected and we got a page not present fault, then sync
463 * the pages. If the fault was caused by a read, then restart the instruction.
464 * In case of write access continue to the GC write handler.
465 */
466 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
467 && !(uErr & X86_TRAP_PF_P))
468 {
469 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
470 if ( RT_FAILURE(rc)
471 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
472 || !(uErr & X86_TRAP_PF_RW))
473 {
474 AssertRC(rc);
475 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
476 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
477 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
478 return rc;
479 }
480 }
481# endif
482 /*
483 * Ok, it's an virtual page access handler.
484 *
485 * Since it's faster to search by address, we'll do that first
486 * and then retry by GCPhys if that fails.
487 */
488 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
489 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
490 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
491 */
492 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
493 if (pCur)
494 {
495 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
496 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
497 || !(uErr & X86_TRAP_PF_P)
498 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
499 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
500
501 if ( pvFault - pCur->Core.Key < pCur->cb
502 && ( uErr & X86_TRAP_PF_RW
503 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
504 {
505# ifdef IN_RC
506 STAM_PROFILE_START(&pCur->Stat, h);
507 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
508 STAM_PROFILE_STOP(&pCur->Stat, h);
509# else
510 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
511# endif
512 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
513 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
514 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
515 return rc;
516 }
517 /* Unhandled part of a monitored page */
518 }
519 else
520 {
521 /* Check by physical address. */
522 PPGMVIRTHANDLER pCur;
523 unsigned iPage;
524 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
525 &pCur, &iPage);
526 Assert(RT_SUCCESS(rc) || !pCur);
527 if ( pCur
528 && ( uErr & X86_TRAP_PF_RW
529 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
530 {
531 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
532# ifdef IN_RC
533 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
534 Assert(off < pCur->cb);
535 STAM_PROFILE_START(&pCur->Stat, h);
536 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
537 STAM_PROFILE_STOP(&pCur->Stat, h);
538# else
539 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
540# endif
541 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
542 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
543 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
544 return rc;
545 }
546 }
547 }
548# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
549
550 /*
551 * There is a handled area of the page, but this fault doesn't belong to it.
552 * We must emulate the instruction.
553 *
554 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
555 * we first check if this was a page-not-present fault for a page with only
556 * write access handlers. Restart the instruction if it wasn't a write access.
557 */
558 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
559
560 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
561 && !(uErr & X86_TRAP_PF_P))
562 {
563 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
564 if ( RT_FAILURE(rc)
565 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
566 || !(uErr & X86_TRAP_PF_RW))
567 {
568 AssertRC(rc);
569 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
570 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
571 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
572 return rc;
573 }
574 }
575
576 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
577 * It's writing to an unhandled part of the LDT page several million times.
578 */
579 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
580 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
581 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
582 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
583 return rc;
584 } /* if any kind of handler */
585
586# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
587 if (uErr & X86_TRAP_PF_P)
588 {
589 /*
590 * The page isn't marked, but it might still be monitored by a virtual page access handler.
591 * (ASSUMES no temporary disabling of virtual handlers.)
592 */
593 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
594 * we should correct both the shadow page table and physical memory flags, and not only check for
595 * accesses within the handler region but for access to pages with virtual handlers. */
596 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
597 if (pCur)
598 {
599 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
600 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
601 || !(uErr & X86_TRAP_PF_P)
602 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
603 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
604
605 if ( pvFault - pCur->Core.Key < pCur->cb
606 && ( uErr & X86_TRAP_PF_RW
607 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
608 {
609# ifdef IN_RC
610 STAM_PROFILE_START(&pCur->Stat, h);
611 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
612 STAM_PROFILE_STOP(&pCur->Stat, h);
613# else
614 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
615# endif
616 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
617 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
618 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
619 return rc;
620 }
621 }
622 }
623# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
624 }
625 else
626 {
627 /*
628 * When the guest accesses invalid physical memory (e.g. probing
629 * of RAM or accessing a remapped MMIO range), then we'll fall
630 * back to the recompiler to emulate the instruction.
631 */
632 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
633 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
634 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
635 return VINF_EM_RAW_EMULATE_INSTR;
636 }
637
638 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
639
640# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
641 /*
642 * We are here only if page is present in Guest page tables and
643 * trap is not handled by our handlers.
644 *
645 * Check it for page out-of-sync situation.
646 */
647 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
648
649 if (!(uErr & X86_TRAP_PF_P))
650 {
651 /*
652 * Page is not present in our page tables.
653 * Try to sync it!
654 * BTW, fPageShw is invalid in this branch!
655 */
656 if (uErr & X86_TRAP_PF_US)
657 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
658 else /* supervisor */
659 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
660
661# if defined(LOG_ENABLED) && !defined(IN_RING0)
662 RTGCPHYS GCPhys;
663 uint64_t fPageGst;
664 PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys);
665 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%RGp scan=%d\n",
666 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)));
667# endif /* LOG_ENABLED */
668
669# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
670 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
671 {
672 uint64_t fPageGst;
673 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
674 if ( RT_SUCCESS(rc)
675 && !(fPageGst & X86_PTE_US))
676 {
677 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
678 if ( pvFault == (RTGCPTR)pRegFrame->eip
679 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
680# ifdef CSAM_DETECT_NEW_CODE_PAGES
681 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
682 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)) /* any new code we encounter here */
683# endif /* CSAM_DETECT_NEW_CODE_PAGES */
684 )
685 {
686 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
687 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
688 if (rc != VINF_SUCCESS)
689 {
690 /*
691 * CSAM needs to perform a job in ring 3.
692 *
693 * Sync the page before going to the host context; otherwise we'll end up in a loop if
694 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
695 */
696 LogFlow(("CSAM ring 3 job\n"));
697 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
698 AssertRC(rc2);
699
700 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
701 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
702 return rc;
703 }
704 }
705# ifdef CSAM_DETECT_NEW_CODE_PAGES
706 else if ( uErr == X86_TRAP_PF_RW
707 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
708 && pRegFrame->ecx < 0x10000)
709 {
710 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
711 * to detect loading of new code pages.
712 */
713
714 /*
715 * Decode the instruction.
716 */
717 RTGCPTR PC;
718 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
719 if (rc == VINF_SUCCESS)
720 {
721 DISCPUSTATE Cpu;
722 uint32_t cbOp;
723 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, &Cpu, &cbOp);
724
725 /* For now we'll restrict this to rep movsw/d instructions */
726 if ( rc == VINF_SUCCESS
727 && Cpu.pCurInstr->opcode == OP_MOVSWD
728 && (Cpu.prefix & PREFIX_REP))
729 {
730 CSAMMarkPossibleCodePage(pVM, pvFault);
731 }
732 }
733 }
734# endif /* CSAM_DETECT_NEW_CODE_PAGES */
735
736 /*
737 * Mark this page as safe.
738 */
739 /** @todo not correct for pages that contain both code and data!! */
740 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
741 CSAMMarkPage(pVM, (RTRCPTR)pvFault, true);
742 }
743 }
744# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
745 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
746 if (RT_SUCCESS(rc))
747 {
748 /* The page was successfully synced, return to the guest. */
749 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
750 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
751 return VINF_SUCCESS;
752 }
753 }
754 else /* uErr & X86_TRAP_PF_P: */
755 {
756 /*
757 * Write protected pages are make writable when the guest makes the first
758 * write to it. This happens for pages that are shared, write monitored
759 * and not yet allocated.
760 *
761 * Also, a side effect of not flushing global PDEs are out of sync pages due
762 * to physical monitored regions, that are no longer valid.
763 * Assume for now it only applies to the read/write flag.
764 */
765 if (RT_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
766 {
767 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
768 {
769 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n",
770 GCPhys, pPage, pvFault, uErr));
771 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
772 if (rc != VINF_SUCCESS)
773 {
774 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
775 return rc;
776 }
777 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
778 return VINF_EM_NO_MEMORY;
779 }
780 /// @todo count the above case; else
781 if (uErr & X86_TRAP_PF_US)
782 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
783 else /* supervisor */
784 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
785
786 /*
787 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
788 * page is not present, which is not true in this case.
789 */
790 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
791 if (RT_SUCCESS(rc))
792 {
793 /*
794 * Page was successfully synced, return to guest.
795 */
796# ifdef VBOX_STRICT
797 RTGCPHYS GCPhys;
798 uint64_t fPageGst;
799 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys);
800 Assert(RT_SUCCESS(rc) && fPageGst & X86_PTE_RW);
801 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
802
803 uint64_t fPageShw;
804 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
805 AssertMsg(RT_SUCCESS(rc) && fPageShw & X86_PTE_RW, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
806# endif /* VBOX_STRICT */
807 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
808 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
809 return VINF_SUCCESS;
810 }
811
812 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
813 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
814 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG)
815 && (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P))
816 {
817 uint64_t fPageGst;
818 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
819 if ( RT_SUCCESS(rc)
820 && !(fPageGst & X86_PTE_RW))
821 {
822 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
823 if (RT_SUCCESS(rc))
824 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
825 else
826 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
827 return rc;
828 }
829 AssertMsgFailed(("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
830 }
831 }
832
833# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
834# ifdef VBOX_STRICT
835 /*
836 * Check for VMM page flags vs. Guest page flags consistency.
837 * Currently only for debug purposes.
838 */
839 if (RT_SUCCESS(rc))
840 {
841 /* Get guest page flags. */
842 uint64_t fPageGst;
843 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
844 if (RT_SUCCESS(rc))
845 {
846 uint64_t fPageShw;
847 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
848
849 /*
850 * Compare page flags.
851 * Note: we have AVL, A, D bits desynched.
852 */
853 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
854 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
855 }
856 else
857 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
858 }
859 else
860 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
861# endif /* VBOX_STRICT */
862# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
863 }
864 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
865# endif /* PGM_OUT_OF_SYNC_IN_GC */
866 }
867 else /* GCPhys == NIL_RTGCPHYS */
868 {
869 /*
870 * Page not present in Guest OS or invalid page table address.
871 * This is potential virtual page access handler food.
872 *
873 * For the present we'll say that our access handlers don't
874 * work for this case - we've already discarded the page table
875 * not present case which is identical to this.
876 *
877 * When we perchance find we need this, we will probably have AVL
878 * trees (offset based) to operate on and we can measure their speed
879 * agains mapping a page table and probably rearrange this handling
880 * a bit. (Like, searching virtual ranges before checking the
881 * physical address.)
882 */
883 }
884 }
885 /* else: !present (guest) */
886
887
888# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
889 /*
890 * Conclusion, this is a guest trap.
891 */
892 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
893 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
894 return VINF_EM_RAW_GUEST_TRAP;
895# else
896 /* present, but not a monitored page; perhaps the guest is probing physical memory */
897 return VINF_EM_RAW_EMULATE_INSTR;
898# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
899
900
901# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
902
903 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
904 return VERR_INTERNAL_ERROR;
905# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
906}
907#endif /* !IN_RING3 */
908
909
910/**
911 * Emulation of the invlpg instruction.
912 *
913 *
914 * @returns VBox status code.
915 *
916 * @param pVCpu The VMCPU handle.
917 * @param GCPtrPage Page to invalidate.
918 *
919 * @remark ASSUMES that the guest is updating before invalidating. This order
920 * isn't required by the CPU, so this is speculative and could cause
921 * trouble.
922 * @remark No TLB shootdown is done on any other VCPU as we assume that
923 * invlpg emulation is the *only* reason for calling this function.
924 * (The guest has to shoot down TLB entries on other CPUs itself)
925 * Currently true, but keep in mind!
926 *
927 * @todo Flush page or page directory only if necessary!
928 * @todo Add a #define for simply invalidating the page.
929 */
930PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
931{
932#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
933 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
934 && PGM_SHW_TYPE != PGM_TYPE_EPT
935 int rc;
936 PVM pVM = pVCpu->CTX_SUFF(pVM);
937 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
938
939 Assert(PGMIsLockOwner(pVM));
940
941 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
942 /*
943 * Get the shadow PD entry and skip out if this PD isn't present.
944 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
945 */
946# if PGM_SHW_TYPE == PGM_TYPE_32BIT
947 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
948 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
949
950 /* Fetch the pgm pool shadow descriptor. */
951 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
952 Assert(pShwPde);
953
954# elif PGM_SHW_TYPE == PGM_TYPE_PAE
955 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
956 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
957
958 /* If the shadow PDPE isn't present, then skip the invalidate. */
959 if (!pPdptDst->a[iPdpt].n.u1Present)
960 {
961 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
962 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
963 return VINF_SUCCESS;
964 }
965
966 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
967 PPGMPOOLPAGE pShwPde = NULL;
968 PX86PDPAE pPDDst;
969
970 /* Fetch the pgm pool shadow descriptor. */
971 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
972 AssertRCSuccessReturn(rc, rc);
973 Assert(pShwPde);
974
975 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
976 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
977
978# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
979 /* PML4 */
980 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
981 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
982 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
983 PX86PDPAE pPDDst;
984 PX86PDPT pPdptDst;
985 PX86PML4E pPml4eDst;
986 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
987 if (rc != VINF_SUCCESS)
988 {
989 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
990 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
991 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
992 PGM_INVL_VCPU_TLBS(pVCpu);
993 return VINF_SUCCESS;
994 }
995 Assert(pPDDst);
996
997 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
998 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
999
1000 if (!pPdpeDst->n.u1Present)
1001 {
1002 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1003 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
1004 PGM_INVL_VCPU_TLBS(pVCpu);
1005 return VINF_SUCCESS;
1006 }
1007
1008# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1009
1010 const SHWPDE PdeDst = *pPdeDst;
1011 if (!PdeDst.n.u1Present)
1012 {
1013 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1014 return VINF_SUCCESS;
1015 }
1016
1017# if defined(IN_RC)
1018 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1019 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1020# endif
1021
1022 /*
1023 * Get the guest PD entry and calc big page.
1024 */
1025# if PGM_GST_TYPE == PGM_TYPE_32BIT
1026 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
1027 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1028 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1029# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1030 unsigned iPDSrc = 0;
1031# if PGM_GST_TYPE == PGM_TYPE_PAE
1032 X86PDPE PdpeSrc;
1033 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1034# else /* AMD64 */
1035 PX86PML4E pPml4eSrc;
1036 X86PDPE PdpeSrc;
1037 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1038# endif
1039 GSTPDE PdeSrc;
1040
1041 if (pPDSrc)
1042 PdeSrc = pPDSrc->a[iPDSrc];
1043 else
1044 PdeSrc.u = 0;
1045# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1046
1047# if PGM_GST_TYPE == PGM_TYPE_AMD64
1048 const bool fIsBigPage = PdeSrc.b.u1Size;
1049# else
1050 const bool fIsBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
1051# endif
1052
1053# ifdef IN_RING3
1054 /*
1055 * If a CR3 Sync is pending we may ignore the invalidate page operation
1056 * depending on the kind of sync and if it's a global page or not.
1057 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1058 */
1059# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1060 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1061 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1062 && fIsBigPage
1063 && PdeSrc.b.u1Global
1064 )
1065 )
1066# else
1067 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1068# endif
1069 {
1070 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1071 return VINF_SUCCESS;
1072 }
1073# endif /* IN_RING3 */
1074
1075# if PGM_GST_TYPE == PGM_TYPE_AMD64
1076 /* Fetch the pgm pool shadow descriptor. */
1077 PPGMPOOLPAGE pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
1078 Assert(pShwPdpt);
1079
1080 /* Fetch the pgm pool shadow descriptor. */
1081 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1082 Assert(pShwPde);
1083
1084 Assert(pPml4eDst->n.u1Present && (pPml4eDst->u & SHW_PDPT_MASK));
1085 RTGCPHYS GCPhysPdpt = pPml4eSrc->u & X86_PML4E_PG_MASK;
1086
1087 if ( !pPml4eSrc->n.u1Present
1088 || pShwPdpt->GCPhys != GCPhysPdpt)
1089 {
1090 LogFlow(("InvalidatePage: Out-of-sync PML4E (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1091 GCPtrPage, pShwPdpt->GCPhys, GCPhysPdpt, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1092 pgmPoolFreeByPage(pPool, pShwPdpt, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1093 ASMAtomicWriteSize(pPml4eDst, 0);
1094 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1095 PGM_INVL_VCPU_TLBS(pVCpu);
1096 return VINF_SUCCESS;
1097 }
1098 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
1099 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
1100 {
1101 /*
1102 * Mark not present so we can resync the PML4E when it's used.
1103 */
1104 LogFlow(("InvalidatePage: Out-of-sync PML4E at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1105 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1106 pgmPoolFreeByPage(pPool, pShwPdpt, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1107 ASMAtomicWriteSize(pPml4eDst, 0);
1108 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1109 PGM_INVL_VCPU_TLBS(pVCpu);
1110 }
1111 else if (!pPml4eSrc->n.u1Accessed)
1112 {
1113 /*
1114 * Mark not present so we can set the accessed bit.
1115 */
1116 LogFlow(("InvalidatePage: Out-of-sync PML4E (A) at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1117 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1118 pgmPoolFreeByPage(pPool, pShwPdpt, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1119 ASMAtomicWriteSize(pPml4eDst, 0);
1120 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1121 PGM_INVL_VCPU_TLBS(pVCpu);
1122 }
1123
1124 /* Check if the PDPT entry has changed. */
1125 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
1126 RTGCPHYS GCPhysPd = PdpeSrc.u & GST_PDPE_PG_MASK;
1127 if ( !PdpeSrc.n.u1Present
1128 || pShwPde->GCPhys != GCPhysPd)
1129 {
1130 LogFlow(("InvalidatePage: Out-of-sync PDPE (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1131 GCPtrPage, pShwPde->GCPhys, GCPhysPd, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1132 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1133 ASMAtomicWriteSize(pPdpeDst, 0);
1134 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1135 PGM_INVL_VCPU_TLBS(pVCpu);
1136 return VINF_SUCCESS;
1137 }
1138 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
1139 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
1140 {
1141 /*
1142 * Mark not present so we can resync the PDPTE when it's used.
1143 */
1144 LogFlow(("InvalidatePage: Out-of-sync PDPE at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1145 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1146 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1147 ASMAtomicWriteSize(pPdpeDst, 0);
1148 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1149 PGM_INVL_VCPU_TLBS(pVCpu);
1150 }
1151 else if (!PdpeSrc.lm.u1Accessed)
1152 {
1153 /*
1154 * Mark not present so we can set the accessed bit.
1155 */
1156 LogFlow(("InvalidatePage: Out-of-sync PDPE (A) at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1157 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1158 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1159 ASMAtomicWriteSize(pPdpeDst, 0);
1160 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1161 PGM_INVL_VCPU_TLBS(pVCpu);
1162 }
1163# endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1164
1165 /*
1166 * Deal with the Guest PDE.
1167 */
1168 rc = VINF_SUCCESS;
1169 if (PdeSrc.n.u1Present)
1170 {
1171 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1172 {
1173 /*
1174 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1175 */
1176 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1177 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1178 pgmLock(pVM);
1179 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1180 pgmUnlock(pVM);
1181 }
1182 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
1183 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1184 {
1185 /*
1186 * Mark not present so we can resync the PDE when it's used.
1187 */
1188 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1189 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1190 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1191 ASMAtomicWriteSize(pPdeDst, 0);
1192 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1193 PGM_INVL_VCPU_TLBS(pVCpu);
1194 }
1195 else if (!PdeSrc.n.u1Accessed)
1196 {
1197 /*
1198 * Mark not present so we can set the accessed bit.
1199 */
1200 LogFlow(("InvalidatePage: Out-of-sync (A) at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1201 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1202 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1203 ASMAtomicWriteSize(pPdeDst, 0);
1204 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1205 PGM_INVL_VCPU_TLBS(pVCpu);
1206 }
1207 else if (!fIsBigPage)
1208 {
1209 /*
1210 * 4KB - page.
1211 */
1212 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1213 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1214# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1215 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1216 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1217# endif
1218 if (pShwPage->GCPhys == GCPhys)
1219 {
1220# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1221 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1222 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1223 if (pPT->a[iPTEDst].n.u1Present)
1224 {
1225# ifdef PGMPOOL_WITH_USER_TRACKING
1226 /* This is very unlikely with caching/monitoring enabled. */
1227 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1228# endif
1229 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1230 }
1231# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1232 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1233 if (RT_SUCCESS(rc))
1234 rc = VINF_SUCCESS;
1235# endif
1236 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1237 PGM_INVL_PG(pVCpu, GCPtrPage);
1238 }
1239 else
1240 {
1241 /*
1242 * The page table address changed.
1243 */
1244 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1245 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1246 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1247 ASMAtomicWriteSize(pPdeDst, 0);
1248 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1249 PGM_INVL_VCPU_TLBS(pVCpu);
1250 }
1251 }
1252 else
1253 {
1254 /*
1255 * 2/4MB - page.
1256 */
1257 /* Before freeing the page, check if anything really changed. */
1258 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1259 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1260# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1261 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1262 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1263# endif
1264 if ( pShwPage->GCPhys == GCPhys
1265 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1266 {
1267 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1268 /** @todo PAT */
1269 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1270 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1271 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1272 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1273 {
1274 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1275 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1276# if defined(IN_RC)
1277 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1278 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1279# endif
1280 return VINF_SUCCESS;
1281 }
1282 }
1283
1284 /*
1285 * Ok, the page table is present and it's been changed in the guest.
1286 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1287 * We could do this for some flushes in GC too, but we need an algorithm for
1288 * deciding which 4MB pages containing code likely to be executed very soon.
1289 */
1290 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1291 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1292 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1293 ASMAtomicWriteSize(pPdeDst, 0);
1294 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1295 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1296 }
1297 }
1298 else
1299 {
1300 /*
1301 * Page directory is not present, mark shadow PDE not present.
1302 */
1303 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1304 {
1305 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1306 ASMAtomicWriteSize(pPdeDst, 0);
1307 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1308 PGM_INVL_PG(pVCpu, GCPtrPage);
1309 }
1310 else
1311 {
1312 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1313 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1314 }
1315 }
1316# if defined(IN_RC)
1317 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1318 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1319# endif
1320 return rc;
1321
1322#else /* guest real and protected mode */
1323 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1324 return VINF_SUCCESS;
1325#endif
1326}
1327
1328
1329#ifdef PGMPOOL_WITH_USER_TRACKING
1330/**
1331 * Update the tracking of shadowed pages.
1332 *
1333 * @param pVCpu The VMCPU handle.
1334 * @param pShwPage The shadow page.
1335 * @param HCPhys The physical page we is being dereferenced.
1336 */
1337DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1338{
1339# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1340 PVM pVM = pVCpu->CTX_SUFF(pVM);
1341
1342 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1343 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1344
1345 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1346 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1347 * 2. write protect all shadowed pages. I.e. implement caching.
1348 */
1349 /*
1350 * Find the guest address.
1351 */
1352 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1353 pRam;
1354 pRam = pRam->CTX_SUFF(pNext))
1355 {
1356 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1357 while (iPage-- > 0)
1358 {
1359 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1360 {
1361 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1362 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1363 pShwPage->cPresent--;
1364 pPool->cPresent--;
1365 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1366 return;
1367 }
1368 }
1369 }
1370
1371 for (;;)
1372 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1373# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1374 pShwPage->cPresent--;
1375 pVM->pgm.s.CTX_SUFF(pPool)->cPresent--;
1376# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1377}
1378
1379
1380/**
1381 * Update the tracking of shadowed pages.
1382 *
1383 * @param pVCpu The VMCPU handle.
1384 * @param pShwPage The shadow page.
1385 * @param u16 The top 16-bit of the pPage->HCPhys.
1386 * @param pPage Pointer to the guest page. this will be modified.
1387 * @param iPTDst The index into the shadow table.
1388 */
1389DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1390{
1391 PVM pVM = pVCpu->CTX_SUFF(pVM);
1392# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1393 /*
1394 * Just deal with the simple first time here.
1395 */
1396 if (!u16)
1397 {
1398 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1399 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1400 }
1401 else
1402 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1403
1404 /* write back */
1405 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1406 PGM_PAGE_SET_TRACKING(pPage, u16);
1407
1408# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1409
1410 /* update statistics. */
1411 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1412 pShwPage->cPresent++;
1413 if (pShwPage->iFirstPresent > iPTDst)
1414 pShwPage->iFirstPresent = iPTDst;
1415}
1416#endif /* PGMPOOL_WITH_USER_TRACKING */
1417
1418
1419/**
1420 * Creates a 4K shadow page for a guest page.
1421 *
1422 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1423 * physical address. The PdeSrc argument only the flags are used. No page structured
1424 * will be mapped in this function.
1425 *
1426 * @param pVCpu The VMCPU handle.
1427 * @param pPteDst Destination page table entry.
1428 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1429 * Can safely assume that only the flags are being used.
1430 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1431 * @param pShwPage Pointer to the shadow page.
1432 * @param iPTDst The index into the shadow table.
1433 *
1434 * @remark Not used for 2/4MB pages!
1435 */
1436DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1437{
1438 if (PteSrc.n.u1Present)
1439 {
1440 PVM pVM = pVCpu->CTX_SUFF(pVM);
1441
1442 /*
1443 * Find the ram range.
1444 */
1445 PPGMPAGE pPage;
1446 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1447 if (RT_SUCCESS(rc))
1448 {
1449#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1450 /* Try make the page writable if necessary. */
1451 if ( PteSrc.n.u1Write
1452 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1453 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1454 {
1455 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1456 AssertRC(rc);
1457 }
1458#endif
1459
1460 /** @todo investiage PWT, PCD and PAT. */
1461 /*
1462 * Make page table entry.
1463 */
1464 SHWPTE PteDst;
1465 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1466 {
1467 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1468 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1469 {
1470#if PGM_SHW_TYPE == PGM_TYPE_EPT
1471 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1472 PteDst.n.u1Present = 1;
1473 PteDst.n.u1Execute = 1;
1474 PteDst.n.u1IgnorePAT = 1;
1475 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1476 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1477#else
1478 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1479 | PGM_PAGE_GET_HCPHYS(pPage);
1480#endif
1481 }
1482 else
1483 {
1484 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1485 PteDst.u = 0;
1486 }
1487 /** @todo count these two kinds. */
1488 }
1489 else
1490 {
1491#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1492 /*
1493 * If the page or page directory entry is not marked accessed,
1494 * we mark the page not present.
1495 */
1496 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1497 {
1498 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1499 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1500 PteDst.u = 0;
1501 }
1502 else
1503 /*
1504 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1505 * when the page is modified.
1506 */
1507 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1508 {
1509 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1510 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1511 | PGM_PAGE_GET_HCPHYS(pPage)
1512 | PGM_PTFLAGS_TRACK_DIRTY;
1513 }
1514 else
1515#endif
1516 {
1517 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1518#if PGM_SHW_TYPE == PGM_TYPE_EPT
1519 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1520 PteDst.n.u1Present = 1;
1521 PteDst.n.u1Write = 1;
1522 PteDst.n.u1Execute = 1;
1523 PteDst.n.u1IgnorePAT = 1;
1524 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1525 /* PteDst.n.u1Size = 0 */
1526#else
1527 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1528 | PGM_PAGE_GET_HCPHYS(pPage);
1529#endif
1530 }
1531 }
1532
1533 /*
1534 * Make sure only allocated pages are mapped writable.
1535 */
1536 if ( PteDst.n.u1Write
1537 && PteDst.n.u1Present
1538 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1539 {
1540 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1541 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1542 }
1543
1544#ifdef PGMPOOL_WITH_USER_TRACKING
1545 /*
1546 * Keep user track up to date.
1547 */
1548 if (PteDst.n.u1Present)
1549 {
1550 if (!pPteDst->n.u1Present)
1551 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1552 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1553 {
1554 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1555 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1556 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1557 }
1558 }
1559 else if (pPteDst->n.u1Present)
1560 {
1561 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1562 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1563 }
1564#endif /* PGMPOOL_WITH_USER_TRACKING */
1565
1566 /*
1567 * Update statistics and commit the entry.
1568 */
1569#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1570 if (!PteSrc.n.u1Global)
1571 pShwPage->fSeenNonGlobal = true;
1572#endif
1573 ASMAtomicWriteSize(pPteDst, PteDst.u);
1574 }
1575 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1576 /** @todo count these. */
1577 }
1578 else
1579 {
1580 /*
1581 * Page not-present.
1582 */
1583 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1584#ifdef PGMPOOL_WITH_USER_TRACKING
1585 /* Keep user track up to date. */
1586 if (pPteDst->n.u1Present)
1587 {
1588 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1589 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1590 }
1591#endif /* PGMPOOL_WITH_USER_TRACKING */
1592 ASMAtomicWriteSize(pPteDst, 0);
1593 /** @todo count these. */
1594 }
1595}
1596
1597
1598/**
1599 * Syncs a guest OS page.
1600 *
1601 * There are no conflicts at this point, neither is there any need for
1602 * page table allocations.
1603 *
1604 * @returns VBox status code.
1605 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1606 * @param pVCpu The VMCPU handle.
1607 * @param PdeSrc Page directory entry of the guest.
1608 * @param GCPtrPage Guest context page address.
1609 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1610 * @param uErr Fault error (X86_TRAP_PF_*).
1611 */
1612PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1613{
1614 PVM pVM = pVCpu->CTX_SUFF(pVM);
1615 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1616 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1617
1618#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1619 || PGM_GST_TYPE == PGM_TYPE_PAE \
1620 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1621 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1622 && PGM_SHW_TYPE != PGM_TYPE_EPT
1623
1624# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1625 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVCpu) & MSR_K6_EFER_NXE);
1626# endif
1627
1628 /*
1629 * Assert preconditions.
1630 */
1631 Assert(PdeSrc.n.u1Present);
1632 Assert(cPages);
1633 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1634
1635 /*
1636 * Get the shadow PDE, find the shadow page table in the pool.
1637 */
1638# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1639 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1640 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1641
1642 /* Fetch the pgm pool shadow descriptor. */
1643 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1644 Assert(pShwPde);
1645
1646# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1647 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1648 PPGMPOOLPAGE pShwPde = NULL;
1649 PX86PDPAE pPDDst;
1650
1651 /* Fetch the pgm pool shadow descriptor. */
1652 int rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1653 AssertRCSuccessReturn(rc, rc);
1654 Assert(pShwPde);
1655
1656 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1657 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1658
1659# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1660 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1661 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1662 PX86PDPAE pPDDst;
1663 PX86PDPT pPdptDst;
1664
1665 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1666 AssertRCSuccessReturn(rc, rc);
1667 Assert(pPDDst && pPdptDst);
1668 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1669# endif
1670 SHWPDE PdeDst = *pPdeDst;
1671 AssertMsg(PdeDst.n.u1Present, ("%p=%llx\n", pPdeDst, (uint64_t)PdeDst.u));
1672 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1673
1674# if PGM_GST_TYPE == PGM_TYPE_AMD64
1675 /* Fetch the pgm pool shadow descriptor. */
1676 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1677 Assert(pShwPde);
1678# endif
1679
1680# if defined(IN_RC)
1681 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1682 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1683# endif
1684
1685 /*
1686 * Check that the page is present and that the shadow PDE isn't out of sync.
1687 */
1688# if PGM_GST_TYPE == PGM_TYPE_AMD64
1689 const bool fBigPage = PdeSrc.b.u1Size;
1690# else
1691 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
1692# endif
1693 RTGCPHYS GCPhys;
1694 if (!fBigPage)
1695 {
1696 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1697# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1698 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1699 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1700# endif
1701 }
1702 else
1703 {
1704 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1705# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1706 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1707 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1708# endif
1709 }
1710 if ( pShwPage->GCPhys == GCPhys
1711 && PdeSrc.n.u1Present
1712 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1713 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1714# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1715 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1716# endif
1717 )
1718 {
1719 /*
1720 * Check that the PDE is marked accessed already.
1721 * Since we set the accessed bit *before* getting here on a #PF, this
1722 * check is only meant for dealing with non-#PF'ing paths.
1723 */
1724 if (PdeSrc.n.u1Accessed)
1725 {
1726 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1727 if (!fBigPage)
1728 {
1729 /*
1730 * 4KB Page - Map the guest page table.
1731 */
1732 PGSTPT pPTSrc;
1733 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1734 if (RT_SUCCESS(rc))
1735 {
1736# ifdef PGM_SYNC_N_PAGES
1737 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1738 if ( cPages > 1
1739 && !(uErr & X86_TRAP_PF_P)
1740 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1741 {
1742 /*
1743 * This code path is currently only taken when the caller is PGMTrap0eHandler
1744 * for non-present pages!
1745 *
1746 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1747 * deal with locality.
1748 */
1749 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1750# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1751 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1752 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1753# else
1754 const unsigned offPTSrc = 0;
1755# endif
1756 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1757 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1758 iPTDst = 0;
1759 else
1760 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1761 for (; iPTDst < iPTDstEnd; iPTDst++)
1762 {
1763 if (!pPTDst->a[iPTDst].n.u1Present)
1764 {
1765 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1766 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1767 NOREF(GCPtrCurPage);
1768#ifndef IN_RING0
1769 /*
1770 * Assuming kernel code will be marked as supervisor - and not as user level
1771 * and executed using a conforming code selector - And marked as readonly.
1772 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1773 */
1774 PPGMPAGE pPage;
1775 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1776 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1777 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)GCPtrCurPage)
1778 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1779 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1780 )
1781#endif /* else: CSAM not active */
1782 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1783 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1784 GCPtrCurPage, PteSrc.n.u1Present,
1785 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1786 PteSrc.n.u1User & PdeSrc.n.u1User,
1787 (uint64_t)PteSrc.u,
1788 (uint64_t)pPTDst->a[iPTDst].u,
1789 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1790 }
1791 }
1792 }
1793 else
1794# endif /* PGM_SYNC_N_PAGES */
1795 {
1796 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1797 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1798 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1799 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1800 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1801 GCPtrPage, PteSrc.n.u1Present,
1802 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1803 PteSrc.n.u1User & PdeSrc.n.u1User,
1804 (uint64_t)PteSrc.u,
1805 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1806 }
1807 }
1808 else /* MMIO or invalid page: emulated in #PF handler. */
1809 {
1810 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1811 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1812 }
1813 }
1814 else
1815 {
1816 /*
1817 * 4/2MB page - lazy syncing shadow 4K pages.
1818 * (There are many causes of getting here, it's no longer only CSAM.)
1819 */
1820 /* Calculate the GC physical address of this 4KB shadow page. */
1821 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1822 /* Find ram range. */
1823 PPGMPAGE pPage;
1824 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1825 if (RT_SUCCESS(rc))
1826 {
1827# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1828 /* Try make the page writable if necessary. */
1829 if ( PdeSrc.n.u1Write
1830 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1831 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1832 {
1833 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
1834 AssertRC(rc);
1835 }
1836# endif
1837
1838 /*
1839 * Make shadow PTE entry.
1840 */
1841 SHWPTE PteDst;
1842 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1843 | PGM_PAGE_GET_HCPHYS(pPage);
1844 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1845 {
1846 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1847 PteDst.n.u1Write = 0;
1848 else
1849 PteDst.u = 0;
1850 }
1851 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1852# ifdef PGMPOOL_WITH_USER_TRACKING
1853 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1854 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1855# endif
1856 /* Make sure only allocated pages are mapped writable. */
1857 if ( PteDst.n.u1Write
1858 && PteDst.n.u1Present
1859 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1860 {
1861 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1862 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1863 }
1864
1865 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1866
1867 /*
1868 * If the page is not flagged as dirty and is writable, then make it read-only
1869 * at PD level, so we can set the dirty bit when the page is modified.
1870 *
1871 * ASSUMES that page access handlers are implemented on page table entry level.
1872 * Thus we will first catch the dirty access and set PDE.D and restart. If
1873 * there is an access handler, we'll trap again and let it work on the problem.
1874 */
1875 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1876 * As for invlpg, it simply frees the whole shadow PT.
1877 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1878 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1879 {
1880 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1881 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1882 PdeDst.n.u1Write = 0;
1883 }
1884 else
1885 {
1886 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1887 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1888 }
1889 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1890 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1891 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1892 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1893 }
1894 else
1895 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1896 }
1897# if defined(IN_RC)
1898 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1899 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1900# endif
1901 return VINF_SUCCESS;
1902 }
1903 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1904 }
1905 else
1906 {
1907 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1908 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1909 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1910 }
1911
1912 /*
1913 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1914 * Yea, I'm lazy.
1915 */
1916 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1917 ASMAtomicWriteSize(pPdeDst, 0);
1918
1919# if defined(IN_RC)
1920 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1921 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1922# endif
1923 PGM_INVL_VCPU_TLBS(pVCpu);
1924 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1925
1926#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1927 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1928 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1929 && !defined(IN_RC)
1930
1931# ifdef PGM_SYNC_N_PAGES
1932 /*
1933 * Get the shadow PDE, find the shadow page table in the pool.
1934 */
1935# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1936 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
1937
1938# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1939 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
1940
1941# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1942 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1943 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1944 PX86PDPAE pPDDst;
1945 X86PDEPAE PdeDst;
1946 PX86PDPT pPdptDst;
1947
1948 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1949 AssertRCSuccessReturn(rc, rc);
1950 Assert(pPDDst && pPdptDst);
1951 PdeDst = pPDDst->a[iPDDst];
1952# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1953 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1954 PEPTPD pPDDst;
1955 EPTPDE PdeDst;
1956
1957 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
1958 if (rc != VINF_SUCCESS)
1959 {
1960 AssertRC(rc);
1961 return rc;
1962 }
1963 Assert(pPDDst);
1964 PdeDst = pPDDst->a[iPDDst];
1965# endif
1966 AssertMsg(PdeDst.n.u1Present, ("%#llx\n", (uint64_t)PdeDst.u));
1967 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1968 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1969
1970 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1971 if ( cPages > 1
1972 && !(uErr & X86_TRAP_PF_P)
1973 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1974 {
1975 /*
1976 * This code path is currently only taken when the caller is PGMTrap0eHandler
1977 * for non-present pages!
1978 *
1979 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1980 * deal with locality.
1981 */
1982 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1983 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1984 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1985 iPTDst = 0;
1986 else
1987 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1988 for (; iPTDst < iPTDstEnd; iPTDst++)
1989 {
1990 if (!pPTDst->a[iPTDst].n.u1Present)
1991 {
1992 GSTPTE PteSrc;
1993
1994 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
1995
1996 /* Fake the page table entry */
1997 PteSrc.u = GCPtrCurPage;
1998 PteSrc.n.u1Present = 1;
1999 PteSrc.n.u1Dirty = 1;
2000 PteSrc.n.u1Accessed = 1;
2001 PteSrc.n.u1Write = 1;
2002 PteSrc.n.u1User = 1;
2003
2004 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2005
2006 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2007 GCPtrCurPage, PteSrc.n.u1Present,
2008 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2009 PteSrc.n.u1User & PdeSrc.n.u1User,
2010 (uint64_t)PteSrc.u,
2011 (uint64_t)pPTDst->a[iPTDst].u,
2012 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2013
2014 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2015 break;
2016 }
2017 else
2018 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2019 }
2020 }
2021 else
2022# endif /* PGM_SYNC_N_PAGES */
2023 {
2024 GSTPTE PteSrc;
2025 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2026 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2027
2028 /* Fake the page table entry */
2029 PteSrc.u = GCPtrCurPage;
2030 PteSrc.n.u1Present = 1;
2031 PteSrc.n.u1Dirty = 1;
2032 PteSrc.n.u1Accessed = 1;
2033 PteSrc.n.u1Write = 1;
2034 PteSrc.n.u1User = 1;
2035 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2036
2037 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2038 GCPtrPage, PteSrc.n.u1Present,
2039 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2040 PteSrc.n.u1User & PdeSrc.n.u1User,
2041 (uint64_t)PteSrc.u,
2042 (uint64_t)pPTDst->a[iPTDst].u,
2043 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2044 }
2045 return VINF_SUCCESS;
2046
2047#else
2048 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2049 return VERR_INTERNAL_ERROR;
2050#endif
2051}
2052
2053
2054#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2055/**
2056 * Investigate page fault and handle write protection page faults caused by
2057 * dirty bit tracking.
2058 *
2059 * @returns VBox status code.
2060 * @param pVCpu The VMCPU handle.
2061 * @param uErr Page fault error code.
2062 * @param pPdeDst Shadow page directory entry.
2063 * @param pPdeSrc Guest page directory entry.
2064 * @param GCPtrPage Guest context page address.
2065 */
2066PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2067{
2068 bool fWriteProtect = !!(CPUMGetGuestCR0(pVCpu) & X86_CR0_WP);
2069 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2070 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2071# if PGM_GST_TYPE == PGM_TYPE_AMD64
2072 bool fBigPagesSupported = true;
2073# else
2074 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
2075# endif
2076# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2077 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVCpu) & MSR_K6_EFER_NXE);
2078# endif
2079 unsigned uPageFaultLevel;
2080 int rc;
2081 PVM pVM = pVCpu->CTX_SUFF(pVM);
2082 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2083
2084 Assert(PGMIsLockOwner(pVM));
2085
2086 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2087 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2088
2089# if PGM_GST_TYPE == PGM_TYPE_PAE \
2090 || PGM_GST_TYPE == PGM_TYPE_AMD64
2091
2092# if PGM_GST_TYPE == PGM_TYPE_AMD64
2093 PX86PML4E pPml4eSrc;
2094 PX86PDPE pPdpeSrc;
2095
2096 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc);
2097 Assert(pPml4eSrc);
2098
2099 /*
2100 * Real page fault? (PML4E level)
2101 */
2102 if ( (uErr & X86_TRAP_PF_RSVD)
2103 || !pPml4eSrc->n.u1Present
2104 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
2105 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2106 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2107 )
2108 {
2109 uPageFaultLevel = 0;
2110 goto l_UpperLevelPageFault;
2111 }
2112 Assert(pPdpeSrc);
2113
2114# else /* PAE */
2115 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVCpu->pgm.s, GCPtrPage);
2116# endif /* PAE */
2117
2118 /*
2119 * Real page fault? (PDPE level)
2120 */
2121 if ( (uErr & X86_TRAP_PF_RSVD)
2122 || !pPdpeSrc->n.u1Present
2123# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2124 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
2125 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
2126 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2127# endif
2128 )
2129 {
2130 uPageFaultLevel = 1;
2131 goto l_UpperLevelPageFault;
2132 }
2133# endif
2134
2135 /*
2136 * Real page fault? (PDE level)
2137 */
2138 if ( (uErr & X86_TRAP_PF_RSVD)
2139 || !pPdeSrc->n.u1Present
2140# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2141 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
2142# endif
2143 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2144 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2145 {
2146 uPageFaultLevel = 2;
2147 goto l_UpperLevelPageFault;
2148 }
2149
2150 /*
2151 * First check the easy case where the page directory has been marked read-only to track
2152 * the dirty bit of an emulated BIG page
2153 */
2154 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2155 {
2156 /* Mark guest page directory as accessed */
2157# if PGM_GST_TYPE == PGM_TYPE_AMD64
2158 pPml4eSrc->n.u1Accessed = 1;
2159 pPdpeSrc->lm.u1Accessed = 1;
2160# endif
2161 pPdeSrc->b.u1Accessed = 1;
2162
2163 /*
2164 * Only write protection page faults are relevant here.
2165 */
2166 if (fWriteFault)
2167 {
2168 /* Mark guest page directory as dirty (BIG page only). */
2169 pPdeSrc->b.u1Dirty = 1;
2170
2171 if (pPdeDst->n.u1Present)
2172 {
2173 if (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY)
2174 {
2175 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2176 Assert(pPdeSrc->b.u1Write);
2177
2178 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2179 * fault again and take this path to only invalidate the entry.
2180 */
2181 pPdeDst->n.u1Write = 1;
2182 pPdeDst->n.u1Accessed = 1;
2183 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2184 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2185 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2186 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2187 }
2188# ifdef IN_RING0
2189 else
2190 /* Check for stale TLB entry; only applies to the SMP guest case. */
2191 if ( pVM->cCPUs > 1
2192 && pPdeDst->n.u1Write
2193 && pPdeDst->n.u1Accessed)
2194 {
2195 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2196 if (pShwPage)
2197 {
2198 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2199 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2200 if ( pPteDst->n.u1Present
2201 && pPteDst->n.u1Write)
2202 {
2203 /* Stale TLB entry. */
2204 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2205 PGM_INVL_PG(pVCpu, GCPtrPage);
2206
2207 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2208 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2209 }
2210 }
2211 }
2212# endif /* IN_RING0 */
2213 }
2214 }
2215 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2216 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2217 }
2218 /* else: 4KB page table */
2219
2220 /*
2221 * Map the guest page table.
2222 */
2223 PGSTPT pPTSrc;
2224 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2225 if (RT_SUCCESS(rc))
2226 {
2227 /*
2228 * Real page fault?
2229 */
2230 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2231 const GSTPTE PteSrc = *pPteSrc;
2232 if ( !PteSrc.n.u1Present
2233# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2234 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
2235# endif
2236 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
2237 || (fUserLevelFault && !PteSrc.n.u1User)
2238 )
2239 {
2240 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2241 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2242 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2243
2244 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2245 * See the 2nd case above as well.
2246 */
2247 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2248 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2249
2250 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2251 return VINF_EM_RAW_GUEST_TRAP;
2252 }
2253 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2254
2255 /*
2256 * Set the accessed bits in the page directory and the page table.
2257 */
2258# if PGM_GST_TYPE == PGM_TYPE_AMD64
2259 pPml4eSrc->n.u1Accessed = 1;
2260 pPdpeSrc->lm.u1Accessed = 1;
2261# endif
2262 pPdeSrc->n.u1Accessed = 1;
2263 pPteSrc->n.u1Accessed = 1;
2264
2265 /*
2266 * Only write protection page faults are relevant here.
2267 */
2268 if (fWriteFault)
2269 {
2270 /* Write access, so mark guest entry as dirty. */
2271# ifdef VBOX_WITH_STATISTICS
2272 if (!pPteSrc->n.u1Dirty)
2273 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2274 else
2275 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2276# endif
2277
2278 pPteSrc->n.u1Dirty = 1;
2279
2280 if (pPdeDst->n.u1Present)
2281 {
2282#ifndef IN_RING0
2283 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2284 * Our individual shadow handlers will provide more information and force a fatal exit.
2285 */
2286 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2287 {
2288 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2289 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2290 return VINF_SUCCESS;
2291 }
2292#endif
2293 /*
2294 * Map shadow page table.
2295 */
2296 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2297 if (pShwPage)
2298 {
2299 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2300 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2301 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2302 {
2303 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2304 {
2305 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2306# ifdef VBOX_STRICT
2307 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2308 if (pPage)
2309 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
2310 ("Unexpected dirty bit tracking on monitored page %RGv (phys %RGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
2311# endif
2312 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2313
2314 Assert(pPteSrc->n.u1Write);
2315
2316 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2317 * fault again and take this path to only invalidate the entry.
2318 */
2319 pPteDst->n.u1Write = 1;
2320 pPteDst->n.u1Dirty = 1;
2321 pPteDst->n.u1Accessed = 1;
2322 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2323 PGM_INVL_PG(pVCpu, GCPtrPage);
2324
2325 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2326 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2327 }
2328# ifdef IN_RING0
2329 else
2330 /* Check for stale TLB entry; only applies to the SMP guest case. */
2331 if ( pVM->cCPUs > 1
2332 && pPteDst->n.u1Write == 1
2333 && pPteDst->n.u1Accessed == 1)
2334 {
2335 /* Stale TLB entry. */
2336 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2337 PGM_INVL_PG(pVCpu, GCPtrPage);
2338
2339 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2340 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2341 }
2342# endif
2343 }
2344 }
2345 else
2346 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2347 }
2348 }
2349/** @todo Optimize accessed bit emulation? */
2350# ifdef VBOX_STRICT
2351 /*
2352 * Sanity check.
2353 */
2354 else if ( !pPteSrc->n.u1Dirty
2355 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2356 && pPdeDst->n.u1Present)
2357 {
2358 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2359 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2360 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2361 if ( pPteDst->n.u1Present
2362 && pPteDst->n.u1Write)
2363 LogFlow(("Writable present page %RGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2364 }
2365# endif /* VBOX_STRICT */
2366 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2367 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2368 }
2369 AssertRC(rc);
2370 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2371 return rc;
2372
2373
2374l_UpperLevelPageFault:
2375 /*
2376 * Pagefault detected while checking the PML4E, PDPE or PDE.
2377 * Single exit handler to get rid of duplicate code paths.
2378 */
2379 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2380 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2381 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2382
2383 if (
2384# if PGM_GST_TYPE == PGM_TYPE_AMD64
2385 pPml4eSrc->n.u1Present &&
2386# endif
2387# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2388 pPdpeSrc->n.u1Present &&
2389# endif
2390 pPdeSrc->n.u1Present)
2391 {
2392 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2393 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2394 {
2395 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2396 }
2397 else
2398 {
2399 /*
2400 * Map the guest page table.
2401 */
2402 PGSTPT pPTSrc;
2403 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2404 if (RT_SUCCESS(rc))
2405 {
2406 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2407 const GSTPTE PteSrc = *pPteSrc;
2408 if (pPteSrc->n.u1Present)
2409 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2410 }
2411 AssertRC(rc);
2412 }
2413 }
2414 return VINF_EM_RAW_GUEST_TRAP;
2415}
2416#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2417
2418
2419/**
2420 * Sync a shadow page table.
2421 *
2422 * The shadow page table is not present. This includes the case where
2423 * there is a conflict with a mapping.
2424 *
2425 * @returns VBox status code.
2426 * @param pVCpu The VMCPU handle.
2427 * @param iPD Page directory index.
2428 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2429 * Assume this is a temporary mapping.
2430 * @param GCPtrPage GC Pointer of the page that caused the fault
2431 */
2432PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2433{
2434 PVM pVM = pVCpu->CTX_SUFF(pVM);
2435 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2436
2437 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2438 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2439 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2440
2441 Assert(PGMIsLocked(pVM));
2442
2443#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2444 || PGM_GST_TYPE == PGM_TYPE_PAE \
2445 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2446 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2447 && PGM_SHW_TYPE != PGM_TYPE_EPT
2448
2449 int rc = VINF_SUCCESS;
2450
2451 /*
2452 * Validate input a little bit.
2453 */
2454 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2455# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2456 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2457 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2458
2459 /* Fetch the pgm pool shadow descriptor. */
2460 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2461 Assert(pShwPde);
2462
2463# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2464 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2465 PPGMPOOLPAGE pShwPde = NULL;
2466 PX86PDPAE pPDDst;
2467 PSHWPDE pPdeDst;
2468
2469 /* Fetch the pgm pool shadow descriptor. */
2470 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2471 AssertRCSuccessReturn(rc, rc);
2472 Assert(pShwPde);
2473
2474 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2475 pPdeDst = &pPDDst->a[iPDDst];
2476
2477# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2478 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2479 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2480 PX86PDPAE pPDDst;
2481 PX86PDPT pPdptDst;
2482 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2483 AssertRCSuccessReturn(rc, rc);
2484 Assert(pPDDst);
2485 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2486# endif
2487 SHWPDE PdeDst = *pPdeDst;
2488
2489# if PGM_GST_TYPE == PGM_TYPE_AMD64
2490 /* Fetch the pgm pool shadow descriptor. */
2491 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2492 Assert(pShwPde);
2493# endif
2494
2495# ifndef PGM_WITHOUT_MAPPINGS
2496 /*
2497 * Check for conflicts.
2498 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2499 * HC: Simply resolve the conflict.
2500 */
2501 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2502 {
2503 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2504# ifndef IN_RING3
2505 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2506 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2507 return VERR_ADDRESS_CONFLICT;
2508# else
2509 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2510 Assert(pMapping);
2511# if PGM_GST_TYPE == PGM_TYPE_32BIT
2512 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2513# elif PGM_GST_TYPE == PGM_TYPE_PAE
2514 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2515# else
2516 AssertFailed(); /* can't happen for amd64 */
2517# endif
2518 if (RT_FAILURE(rc))
2519 {
2520 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2521 return rc;
2522 }
2523 PdeDst = *pPdeDst;
2524# endif
2525 }
2526# else /* PGM_WITHOUT_MAPPINGS */
2527 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2528# endif /* PGM_WITHOUT_MAPPINGS */
2529 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2530
2531# if defined(IN_RC)
2532 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2533 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2534# endif
2535
2536 /*
2537 * Sync page directory entry.
2538 */
2539 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2540 if (PdeSrc.n.u1Present)
2541 {
2542 /*
2543 * Allocate & map the page table.
2544 */
2545 PSHWPT pPTDst;
2546# if PGM_GST_TYPE == PGM_TYPE_AMD64
2547 const bool fPageTable = !PdeSrc.b.u1Size;
2548# else
2549 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
2550# endif
2551 PPGMPOOLPAGE pShwPage;
2552 RTGCPHYS GCPhys;
2553 if (fPageTable)
2554 {
2555 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2556# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2557 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2558 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2559# endif
2560 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2561 }
2562 else
2563 {
2564 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2565# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2566 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2567 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2568# endif
2569 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, pShwPde->idx, iPDDst, &pShwPage);
2570 }
2571 if (rc == VINF_SUCCESS)
2572 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2573 else if (rc == VINF_PGM_CACHED_PAGE)
2574 {
2575 /*
2576 * The PT was cached, just hook it up.
2577 */
2578 if (fPageTable)
2579 PdeDst.u = pShwPage->Core.Key
2580 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2581 else
2582 {
2583 PdeDst.u = pShwPage->Core.Key
2584 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2585 /* (see explanation and assumptions further down.) */
2586 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2587 {
2588 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2589 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2590 PdeDst.b.u1Write = 0;
2591 }
2592 }
2593 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2594# if defined(IN_RC)
2595 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2596# endif
2597 return VINF_SUCCESS;
2598 }
2599 else if (rc == VERR_PGM_POOL_FLUSHED)
2600 {
2601 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2602# if defined(IN_RC)
2603 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2604# endif
2605 return VINF_PGM_SYNC_CR3;
2606 }
2607 else
2608 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2609 PdeDst.u &= X86_PDE_AVL_MASK;
2610 PdeDst.u |= pShwPage->Core.Key;
2611
2612 /*
2613 * Page directory has been accessed (this is a fault situation, remember).
2614 */
2615 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2616 if (fPageTable)
2617 {
2618 /*
2619 * Page table - 4KB.
2620 *
2621 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2622 */
2623 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2624 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2625 PGSTPT pPTSrc;
2626 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2627 if (RT_SUCCESS(rc))
2628 {
2629 /*
2630 * Start by syncing the page directory entry so CSAM's TLB trick works.
2631 */
2632 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2633 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2634 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2635# if defined(IN_RC)
2636 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2637# endif
2638
2639 /*
2640 * Directory/page user or supervisor privilege: (same goes for read/write)
2641 *
2642 * Directory Page Combined
2643 * U/S U/S U/S
2644 * 0 0 0
2645 * 0 1 0
2646 * 1 0 0
2647 * 1 1 1
2648 *
2649 * Simple AND operation. Table listed for completeness.
2650 *
2651 */
2652 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2653# ifdef PGM_SYNC_N_PAGES
2654 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2655 unsigned iPTDst = iPTBase;
2656 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2657 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2658 iPTDst = 0;
2659 else
2660 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2661# else /* !PGM_SYNC_N_PAGES */
2662 unsigned iPTDst = 0;
2663 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2664# endif /* !PGM_SYNC_N_PAGES */
2665# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2666 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2667 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2668# else
2669 const unsigned offPTSrc = 0;
2670# endif
2671 for (; iPTDst < iPTDstEnd; iPTDst++)
2672 {
2673 const unsigned iPTSrc = iPTDst + offPTSrc;
2674 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2675
2676 if (PteSrc.n.u1Present) /* we've already cleared it above */
2677 {
2678# ifndef IN_RING0
2679 /*
2680 * Assuming kernel code will be marked as supervisor - and not as user level
2681 * and executed using a conforming code selector - And marked as readonly.
2682 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2683 */
2684 PPGMPAGE pPage;
2685 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2686 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2687 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2688 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2689 )
2690# endif
2691 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2692 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2693 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2694 PteSrc.n.u1Present,
2695 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2696 PteSrc.n.u1User & PdeSrc.n.u1User,
2697 (uint64_t)PteSrc.u,
2698 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2699 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2700 }
2701 } /* for PTEs */
2702 }
2703 }
2704 else
2705 {
2706 /*
2707 * Big page - 2/4MB.
2708 *
2709 * We'll walk the ram range list in parallel and optimize lookups.
2710 * We will only sync on shadow page table at a time.
2711 */
2712 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2713
2714 /**
2715 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2716 */
2717
2718 /*
2719 * Start by syncing the page directory entry.
2720 */
2721 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2722 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2723
2724 /*
2725 * If the page is not flagged as dirty and is writable, then make it read-only
2726 * at PD level, so we can set the dirty bit when the page is modified.
2727 *
2728 * ASSUMES that page access handlers are implemented on page table entry level.
2729 * Thus we will first catch the dirty access and set PDE.D and restart. If
2730 * there is an access handler, we'll trap again and let it work on the problem.
2731 */
2732 /** @todo move the above stuff to a section in the PGM documentation. */
2733 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2734 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2735 {
2736 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2737 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2738 PdeDst.b.u1Write = 0;
2739 }
2740 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2741# if defined(IN_RC)
2742 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2743# endif
2744
2745 /*
2746 * Fill the shadow page table.
2747 */
2748 /* Get address and flags from the source PDE. */
2749 SHWPTE PteDstBase;
2750 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2751
2752 /* Loop thru the entries in the shadow PT. */
2753 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2754 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2755 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2756 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2757 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2758 unsigned iPTDst = 0;
2759 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2760 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2761 {
2762 /* Advance ram range list. */
2763 while (pRam && GCPhys > pRam->GCPhysLast)
2764 pRam = pRam->CTX_SUFF(pNext);
2765 if (pRam && GCPhys >= pRam->GCPhys)
2766 {
2767 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2768 do
2769 {
2770 /* Make shadow PTE. */
2771 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2772 SHWPTE PteDst;
2773
2774# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2775 /* Try make the page writable if necessary. */
2776 if ( PteDstBase.n.u1Write
2777 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2778 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2779 {
2780 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
2781 AssertRCReturn(rc, rc);
2782 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2783 break;
2784 }
2785# endif
2786
2787 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2788 {
2789 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2790 {
2791 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2792 PteDst.n.u1Write = 0;
2793 }
2794 else
2795 PteDst.u = 0;
2796 }
2797# ifndef IN_RING0
2798 /*
2799 * Assuming kernel code will be marked as supervisor and not as user level and executed
2800 * using a conforming code selector. Don't check for readonly, as that implies the whole
2801 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2802 */
2803 else if ( !PdeSrc.n.u1User
2804 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2805 PteDst.u = 0;
2806# endif
2807 else
2808 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2809
2810 /* Only map writable pages writable. */
2811 if ( PteDst.n.u1Write
2812 && PteDst.n.u1Present
2813 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2814 {
2815 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2816 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2817 }
2818
2819# ifdef PGMPOOL_WITH_USER_TRACKING
2820 if (PteDst.n.u1Present)
2821 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2822# endif
2823 /* commit it */
2824 pPTDst->a[iPTDst] = PteDst;
2825 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2826 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2827 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2828
2829 /* advance */
2830 GCPhys += PAGE_SIZE;
2831 iHCPage++;
2832 iPTDst++;
2833 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2834 && GCPhys <= pRam->GCPhysLast);
2835 }
2836 else if (pRam)
2837 {
2838 Log(("Invalid pages at %RGp\n", GCPhys));
2839 do
2840 {
2841 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2842 GCPhys += PAGE_SIZE;
2843 iPTDst++;
2844 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2845 && GCPhys < pRam->GCPhys);
2846 }
2847 else
2848 {
2849 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2850 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2851 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2852 }
2853 } /* while more PTEs */
2854 } /* 4KB / 4MB */
2855 }
2856 else
2857 AssertRelease(!PdeDst.n.u1Present);
2858
2859 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2860 if (RT_FAILURE(rc))
2861 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2862 return rc;
2863
2864#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2865 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2866 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2867 && !defined(IN_RC)
2868
2869 /*
2870 * Validate input a little bit.
2871 */
2872 int rc = VINF_SUCCESS;
2873# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2874 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2875 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2876
2877 /* Fetch the pgm pool shadow descriptor. */
2878 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2879 Assert(pShwPde);
2880
2881# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2882 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2883 PPGMPOOLPAGE pShwPde;
2884 PX86PDPAE pPDDst;
2885 PSHWPDE pPdeDst;
2886
2887 /* Fetch the pgm pool shadow descriptor. */
2888 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2889 AssertRCSuccessReturn(rc, rc);
2890 Assert(pShwPde);
2891
2892 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2893 pPdeDst = &pPDDst->a[iPDDst];
2894
2895# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2896 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2897 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2898 PX86PDPAE pPDDst;
2899 PX86PDPT pPdptDst;
2900 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2901 AssertRCSuccessReturn(rc, rc);
2902 Assert(pPDDst);
2903 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2904
2905 /* Fetch the pgm pool shadow descriptor. */
2906 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2907 Assert(pShwPde);
2908
2909# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2910 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2911 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2912 PEPTPD pPDDst;
2913 PEPTPDPT pPdptDst;
2914
2915 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
2916 if (rc != VINF_SUCCESS)
2917 {
2918 AssertRC(rc);
2919 return rc;
2920 }
2921 Assert(pPDDst);
2922 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2923
2924 /* Fetch the pgm pool shadow descriptor. */
2925 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2926 Assert(pShwPde);
2927# endif
2928 SHWPDE PdeDst = *pPdeDst;
2929
2930 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2931 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2932
2933 GSTPDE PdeSrc;
2934 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2935 PdeSrc.n.u1Present = 1;
2936 PdeSrc.n.u1Write = 1;
2937 PdeSrc.n.u1Accessed = 1;
2938 PdeSrc.n.u1User = 1;
2939
2940 /*
2941 * Allocate & map the page table.
2942 */
2943 PSHWPT pPTDst;
2944 PPGMPOOLPAGE pShwPage;
2945 RTGCPHYS GCPhys;
2946
2947 /* Virtual address = physical address */
2948 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
2949 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2950
2951 if ( rc == VINF_SUCCESS
2952 || rc == VINF_PGM_CACHED_PAGE)
2953 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2954 else
2955 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2956
2957 PdeDst.u &= X86_PDE_AVL_MASK;
2958 PdeDst.u |= pShwPage->Core.Key;
2959 PdeDst.n.u1Present = 1;
2960 PdeDst.n.u1Write = 1;
2961# if PGM_SHW_TYPE == PGM_TYPE_EPT
2962 PdeDst.n.u1Execute = 1;
2963# else
2964 PdeDst.n.u1User = 1;
2965 PdeDst.n.u1Accessed = 1;
2966# endif
2967 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2968
2969 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2970 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2971 return rc;
2972
2973#else
2974 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
2975 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2976 return VERR_INTERNAL_ERROR;
2977#endif
2978}
2979
2980
2981
2982/**
2983 * Prefetch a page/set of pages.
2984 *
2985 * Typically used to sync commonly used pages before entering raw mode
2986 * after a CR3 reload.
2987 *
2988 * @returns VBox status code.
2989 * @param pVCpu The VMCPU handle.
2990 * @param GCPtrPage Page to invalidate.
2991 */
2992PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
2993{
2994#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2995 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
2996 /*
2997 * Check that all Guest levels thru the PDE are present, getting the
2998 * PD and PDE in the processes.
2999 */
3000 int rc = VINF_SUCCESS;
3001# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3002# if PGM_GST_TYPE == PGM_TYPE_32BIT
3003 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3004 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3005# elif PGM_GST_TYPE == PGM_TYPE_PAE
3006 unsigned iPDSrc;
3007 X86PDPE PdpeSrc;
3008 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3009 if (!pPDSrc)
3010 return VINF_SUCCESS; /* not present */
3011# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3012 unsigned iPDSrc;
3013 PX86PML4E pPml4eSrc;
3014 X86PDPE PdpeSrc;
3015 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3016 if (!pPDSrc)
3017 return VINF_SUCCESS; /* not present */
3018# endif
3019 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3020# else
3021 PGSTPD pPDSrc = NULL;
3022 const unsigned iPDSrc = 0;
3023 GSTPDE PdeSrc;
3024
3025 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3026 PdeSrc.n.u1Present = 1;
3027 PdeSrc.n.u1Write = 1;
3028 PdeSrc.n.u1Accessed = 1;
3029 PdeSrc.n.u1User = 1;
3030# endif
3031
3032 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3033 {
3034 PVM pVM = pVCpu->CTX_SUFF(pVM);
3035 pgmLock(pVM);
3036
3037# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3038 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3039# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3040 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3041 PX86PDPAE pPDDst;
3042 X86PDEPAE PdeDst;
3043# if PGM_GST_TYPE != PGM_TYPE_PAE
3044 X86PDPE PdpeSrc;
3045
3046 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3047 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3048# endif
3049 int rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3050 if (rc != VINF_SUCCESS)
3051 {
3052 pgmUnlock(pVM);
3053 AssertRC(rc);
3054 return rc;
3055 }
3056 Assert(pPDDst);
3057 PdeDst = pPDDst->a[iPDDst];
3058
3059# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3060 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3061 PX86PDPAE pPDDst;
3062 X86PDEPAE PdeDst;
3063
3064# if PGM_GST_TYPE == PGM_TYPE_PROT
3065 /* AMD-V nested paging */
3066 X86PML4E Pml4eSrc;
3067 X86PDPE PdpeSrc;
3068 PX86PML4E pPml4eSrc = &Pml4eSrc;
3069
3070 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3071 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3072 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3073# endif
3074
3075 int rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3076 if (rc != VINF_SUCCESS)
3077 {
3078 pgmUnlock(pVM);
3079 AssertRC(rc);
3080 return rc;
3081 }
3082 Assert(pPDDst);
3083 PdeDst = pPDDst->a[iPDDst];
3084# endif
3085 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3086 {
3087 if (!PdeDst.n.u1Present)
3088 {
3089 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3090 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3091 }
3092 else
3093 {
3094 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3095 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3096 * makes no sense to prefetch more than one page.
3097 */
3098 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3099 if (RT_SUCCESS(rc))
3100 rc = VINF_SUCCESS;
3101 }
3102 }
3103 pgmUnlock(pVM);
3104 }
3105 return rc;
3106
3107#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3108 return VINF_SUCCESS; /* ignore */
3109#endif
3110}
3111
3112
3113
3114
3115/**
3116 * Syncs a page during a PGMVerifyAccess() call.
3117 *
3118 * @returns VBox status code (informational included).
3119 * @param pVCpu The VMCPU handle.
3120 * @param GCPtrPage The address of the page to sync.
3121 * @param fPage The effective guest page flags.
3122 * @param uErr The trap error code.
3123 */
3124PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3125{
3126 PVM pVM = pVCpu->CTX_SUFF(pVM);
3127
3128 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3129
3130 Assert(!HWACCMIsNestedPagingActive(pVM));
3131#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3132 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3133
3134# ifndef IN_RING0
3135 if (!(fPage & X86_PTE_US))
3136 {
3137 /*
3138 * Mark this page as safe.
3139 */
3140 /** @todo not correct for pages that contain both code and data!! */
3141 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3142 CSAMMarkPage(pVM, (RTRCPTR)GCPtrPage, true);
3143 }
3144# endif
3145
3146 /*
3147 * Get guest PD and index.
3148 */
3149# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3150# if PGM_GST_TYPE == PGM_TYPE_32BIT
3151 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3152 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3153# elif PGM_GST_TYPE == PGM_TYPE_PAE
3154 unsigned iPDSrc = 0;
3155 X86PDPE PdpeSrc;
3156 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3157
3158 if (pPDSrc)
3159 {
3160 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3161 return VINF_EM_RAW_GUEST_TRAP;
3162 }
3163# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3164 unsigned iPDSrc;
3165 PX86PML4E pPml4eSrc;
3166 X86PDPE PdpeSrc;
3167 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3168 if (!pPDSrc)
3169 {
3170 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3171 return VINF_EM_RAW_GUEST_TRAP;
3172 }
3173# endif
3174# else
3175 PGSTPD pPDSrc = NULL;
3176 const unsigned iPDSrc = 0;
3177# endif
3178 int rc = VINF_SUCCESS;
3179
3180 pgmLock(pVM);
3181
3182 /*
3183 * First check if the shadow pd is present.
3184 */
3185# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3186 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3187# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3188 PX86PDEPAE pPdeDst;
3189 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3190 PX86PDPAE pPDDst;
3191# if PGM_GST_TYPE != PGM_TYPE_PAE
3192 X86PDPE PdpeSrc;
3193
3194 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3195 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3196# endif
3197 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3198 if (rc != VINF_SUCCESS)
3199 {
3200 pgmUnlock(pVM);
3201 AssertRC(rc);
3202 return rc;
3203 }
3204 Assert(pPDDst);
3205 pPdeDst = &pPDDst->a[iPDDst];
3206
3207# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3208 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3209 PX86PDPAE pPDDst;
3210 PX86PDEPAE pPdeDst;
3211
3212# if PGM_GST_TYPE == PGM_TYPE_PROT
3213 /* AMD-V nested paging */
3214 X86PML4E Pml4eSrc;
3215 X86PDPE PdpeSrc;
3216 PX86PML4E pPml4eSrc = &Pml4eSrc;
3217
3218 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3219 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3220 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3221# endif
3222
3223 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3224 if (rc != VINF_SUCCESS)
3225 {
3226 pgmUnlock(pVM);
3227 AssertRC(rc);
3228 return rc;
3229 }
3230 Assert(pPDDst);
3231 pPdeDst = &pPDDst->a[iPDDst];
3232# endif
3233
3234# if defined(IN_RC)
3235 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3236 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3237# endif
3238
3239 if (!pPdeDst->n.u1Present)
3240 {
3241 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3242 if (rc != VINF_SUCCESS)
3243 {
3244# if defined(IN_RC)
3245 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3246 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3247# endif
3248 pgmUnlock(pVM);
3249 AssertRC(rc);
3250 return rc;
3251 }
3252 }
3253
3254# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3255 /* Check for dirty bit fault */
3256 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3257 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3258 Log(("PGMVerifyAccess: success (dirty)\n"));
3259 else
3260 {
3261 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3262# else
3263 {
3264 GSTPDE PdeSrc;
3265 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3266 PdeSrc.n.u1Present = 1;
3267 PdeSrc.n.u1Write = 1;
3268 PdeSrc.n.u1Accessed = 1;
3269 PdeSrc.n.u1User = 1;
3270
3271# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3272 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3273 if (uErr & X86_TRAP_PF_US)
3274 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3275 else /* supervisor */
3276 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3277
3278 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3279 if (RT_SUCCESS(rc))
3280 {
3281 /* Page was successfully synced */
3282 Log2(("PGMVerifyAccess: success (sync)\n"));
3283 rc = VINF_SUCCESS;
3284 }
3285 else
3286 {
3287 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3288 rc = VINF_EM_RAW_GUEST_TRAP;
3289 }
3290 }
3291# if defined(IN_RC)
3292 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3293 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3294# endif
3295 pgmUnlock(pVM);
3296 return rc;
3297
3298#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3299
3300 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3301 return VERR_INTERNAL_ERROR;
3302#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3303}
3304
3305
3306#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3307# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
3308/**
3309 * Figures out which kind of shadow page this guest PDE warrants.
3310 *
3311 * @returns Shadow page kind.
3312 * @param pPdeSrc The guest PDE in question.
3313 * @param cr4 The current guest cr4 value.
3314 */
3315DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const GSTPDE *pPdeSrc, uint32_t cr4)
3316{
3317# if PMG_GST_TYPE == PGM_TYPE_AMD64
3318 if (!pPdeSrc->n.u1Size)
3319# else
3320 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
3321# endif
3322 return BTH_PGMPOOLKIND_PT_FOR_PT;
3323 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
3324 //{
3325 // case 0:
3326 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
3327 // case X86_PDE4M_RW:
3328 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
3329 // case X86_PDE4M_US:
3330 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
3331 // case X86_PDE4M_RW | X86_PDE4M_US:
3332 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
3333# if 0
3334 // case X86_PDE4M_PAE_NX:
3335 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
3336 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
3337 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
3338 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
3339 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
3340 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
3341 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
3342# endif
3343 return BTH_PGMPOOLKIND_PT_FOR_BIG;
3344 //}
3345}
3346# endif
3347#endif
3348
3349#undef MY_STAM_COUNTER_INC
3350#define MY_STAM_COUNTER_INC(a) do { } while (0)
3351
3352
3353/**
3354 * Syncs the paging hierarchy starting at CR3.
3355 *
3356 * @returns VBox status code, no specials.
3357 * @param pVCpu The VMCPU handle.
3358 * @param cr0 Guest context CR0 register
3359 * @param cr3 Guest context CR3 register
3360 * @param cr4 Guest context CR4 register
3361 * @param fGlobal Including global page directories or not
3362 */
3363PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3364{
3365 PVM pVM = pVCpu->CTX_SUFF(pVM);
3366
3367 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
3368 fGlobal = true; /* Change this CR3 reload to be a global one. */
3369
3370 LogFlow(("SyncCR3 %d\n", fGlobal));
3371
3372#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3373 /*
3374 * Update page access handlers.
3375 * The virtual are always flushed, while the physical are only on demand.
3376 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3377 * have to look into that later because it will have a bad influence on the performance.
3378 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3379 * bird: Yes, but that won't work for aliases.
3380 */
3381 /** @todo this MUST go away. See #1557. */
3382 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3383 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3384 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3385#endif
3386
3387#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3388 /*
3389 * Nested / EPT - almost no work.
3390 */
3391 /** @todo check if this is really necessary; the call does it as well... */
3392 HWACCMFlushTLB(pVCpu);
3393 return VINF_SUCCESS;
3394
3395#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3396 /*
3397 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3398 * out the shadow parts when the guest modifies its tables.
3399 */
3400 return VINF_SUCCESS;
3401
3402#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3403
3404# ifdef PGM_WITHOUT_MAPPINGS
3405 Assert(pVM->pgm.s.fMappingsFixed);
3406 return VINF_SUCCESS;
3407# else
3408 /* Nothing to do when mappings are fixed. */
3409 if (pVM->pgm.s.fMappingsFixed)
3410 return VINF_SUCCESS;
3411
3412 int rc = PGMMapResolveConflicts(pVM);
3413 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3414 if (rc == VINF_PGM_SYNC_CR3)
3415 {
3416 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3417 return VINF_PGM_SYNC_CR3;
3418 }
3419# endif
3420 return VINF_SUCCESS;
3421#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3422}
3423
3424
3425
3426
3427#ifdef VBOX_STRICT
3428#ifdef IN_RC
3429# undef AssertMsgFailed
3430# define AssertMsgFailed Log
3431#endif
3432#ifdef IN_RING3
3433# include <VBox/dbgf.h>
3434
3435/**
3436 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3437 *
3438 * @returns VBox status code (VINF_SUCCESS).
3439 * @param cr3 The root of the hierarchy.
3440 * @param crr The cr4, only PAE and PSE is currently used.
3441 * @param fLongMode Set if long mode, false if not long mode.
3442 * @param cMaxDepth Number of levels to dump.
3443 * @param pHlp Pointer to the output functions.
3444 */
3445__BEGIN_DECLS
3446VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3447__END_DECLS
3448
3449#endif
3450
3451/**
3452 * Checks that the shadow page table is in sync with the guest one.
3453 *
3454 * @returns The number of errors.
3455 * @param pVM The virtual machine.
3456 * @param pVCpu The VMCPU handle.
3457 * @param cr3 Guest context CR3 register
3458 * @param cr4 Guest context CR4 register
3459 * @param GCPtr Where to start. Defaults to 0.
3460 * @param cb How much to check. Defaults to everything.
3461 */
3462PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3463{
3464#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3465 return 0;
3466#else
3467 unsigned cErrors = 0;
3468 PVM pVM = pVCpu->CTX_SUFF(pVM);
3469 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3470
3471#if PGM_GST_TYPE == PGM_TYPE_PAE
3472 /** @todo currently broken; crashes below somewhere */
3473 AssertFailed();
3474#endif
3475
3476#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3477 || PGM_GST_TYPE == PGM_TYPE_PAE \
3478 || PGM_GST_TYPE == PGM_TYPE_AMD64
3479
3480# if PGM_GST_TYPE == PGM_TYPE_AMD64
3481 bool fBigPagesSupported = true;
3482# else
3483 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
3484# endif
3485 PPGMCPU pPGM = &pVCpu->pgm.s;
3486 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3487 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3488# ifndef IN_RING0
3489 RTHCPHYS HCPhys; /* general usage. */
3490# endif
3491 int rc;
3492
3493 /*
3494 * Check that the Guest CR3 and all its mappings are correct.
3495 */
3496 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3497 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3498 false);
3499# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3500# if PGM_GST_TYPE == PGM_TYPE_32BIT
3501 rc = PGMShwGetPage(pVCpu, (RTGCPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3502# else
3503 rc = PGMShwGetPage(pVCpu, (RTGCPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3504# endif
3505 AssertRCReturn(rc, 1);
3506 HCPhys = NIL_RTHCPHYS;
3507 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3508 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3509# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3510 pgmGstGet32bitPDPtr(pPGM);
3511 RTGCPHYS GCPhys;
3512 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3513 AssertRCReturn(rc, 1);
3514 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3515# endif
3516# endif /* !IN_RING0 */
3517
3518 /*
3519 * Get and check the Shadow CR3.
3520 */
3521# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3522 unsigned cPDEs = X86_PG_ENTRIES;
3523 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3524# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3525# if PGM_GST_TYPE == PGM_TYPE_32BIT
3526 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3527# else
3528 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3529# endif
3530 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3531# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3532 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3533 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3534# endif
3535 if (cb != ~(RTGCPTR)0)
3536 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3537
3538/** @todo call the other two PGMAssert*() functions. */
3539
3540# if PGM_GST_TYPE == PGM_TYPE_AMD64
3541 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3542
3543 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3544 {
3545 PPGMPOOLPAGE pShwPdpt = NULL;
3546 PX86PML4E pPml4eSrc;
3547 PX86PML4E pPml4eDst;
3548 RTGCPHYS GCPhysPdptSrc;
3549
3550 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3551 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3552
3553 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3554 if (!pPml4eDst->n.u1Present)
3555 {
3556 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3557 continue;
3558 }
3559
3560 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3561 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3562
3563 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3564 {
3565 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3566 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3567 cErrors++;
3568 continue;
3569 }
3570
3571 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3572 {
3573 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3574 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3575 cErrors++;
3576 continue;
3577 }
3578
3579 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3580 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3581 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3582 {
3583 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3584 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3585 cErrors++;
3586 continue;
3587 }
3588# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3589 {
3590# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3591
3592# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3593 /*
3594 * Check the PDPTEs too.
3595 */
3596 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3597
3598 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3599 {
3600 unsigned iPDSrc;
3601 PPGMPOOLPAGE pShwPde = NULL;
3602 PX86PDPE pPdpeDst;
3603 RTGCPHYS GCPhysPdeSrc;
3604# if PGM_GST_TYPE == PGM_TYPE_PAE
3605 X86PDPE PdpeSrc;
3606 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3607 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3608# else
3609 PX86PML4E pPml4eSrc;
3610 X86PDPE PdpeSrc;
3611 PX86PDPT pPdptDst;
3612 PX86PDPAE pPDDst;
3613 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3614
3615 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3616 if (rc != VINF_SUCCESS)
3617 {
3618 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3619 GCPtr += 512 * _2M;
3620 continue; /* next PDPTE */
3621 }
3622 Assert(pPDDst);
3623# endif
3624 Assert(iPDSrc == 0);
3625
3626 pPdpeDst = &pPdptDst->a[iPdpt];
3627
3628 if (!pPdpeDst->n.u1Present)
3629 {
3630 GCPtr += 512 * _2M;
3631 continue; /* next PDPTE */
3632 }
3633
3634 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3635 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3636
3637 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3638 {
3639 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3640 GCPtr += 512 * _2M;
3641 cErrors++;
3642 continue;
3643 }
3644
3645 if (GCPhysPdeSrc != pShwPde->GCPhys)
3646 {
3647# if PGM_GST_TYPE == PGM_TYPE_AMD64
3648 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3649# else
3650 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3651# endif
3652 GCPtr += 512 * _2M;
3653 cErrors++;
3654 continue;
3655 }
3656
3657# if PGM_GST_TYPE == PGM_TYPE_AMD64
3658 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3659 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3660 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3661 {
3662 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3663 GCPtr += 512 * _2M;
3664 cErrors++;
3665 continue;
3666 }
3667# endif
3668
3669# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3670 {
3671# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3672# if PGM_GST_TYPE == PGM_TYPE_32BIT
3673 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3674# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3675 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3676# endif
3677# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3678 /*
3679 * Iterate the shadow page directory.
3680 */
3681 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3682 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3683
3684 for (;
3685 iPDDst < cPDEs;
3686 iPDDst++, GCPtr += cIncrement)
3687 {
3688# if PGM_SHW_TYPE == PGM_TYPE_PAE
3689 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3690# else
3691 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3692# endif
3693 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3694 {
3695 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3696 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3697 {
3698 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3699 cErrors++;
3700 continue;
3701 }
3702 }
3703 else if ( (PdeDst.u & X86_PDE_P)
3704 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3705 )
3706 {
3707 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3708 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3709 if (!pPoolPage)
3710 {
3711 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3712 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3713 cErrors++;
3714 continue;
3715 }
3716 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3717
3718 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3719 {
3720 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3721 GCPtr, (uint64_t)PdeDst.u));
3722 cErrors++;
3723 }
3724
3725 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3726 {
3727 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3728 GCPtr, (uint64_t)PdeDst.u));
3729 cErrors++;
3730 }
3731
3732 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3733 if (!PdeSrc.n.u1Present)
3734 {
3735 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3736 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3737 cErrors++;
3738 continue;
3739 }
3740
3741 if ( !PdeSrc.b.u1Size
3742 || !fBigPagesSupported)
3743 {
3744 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3745# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3746 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3747# endif
3748 }
3749 else
3750 {
3751# if PGM_GST_TYPE == PGM_TYPE_32BIT
3752 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3753 {
3754 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3755 GCPtr, (uint64_t)PdeSrc.u));
3756 cErrors++;
3757 continue;
3758 }
3759# endif
3760 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3761# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3762 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3763# endif
3764 }
3765
3766 if ( pPoolPage->enmKind
3767 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3768 {
3769 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3770 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3771 cErrors++;
3772 }
3773
3774 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3775 if (!pPhysPage)
3776 {
3777 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3778 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3779 cErrors++;
3780 continue;
3781 }
3782
3783 if (GCPhysGst != pPoolPage->GCPhys)
3784 {
3785 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3786 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3787 cErrors++;
3788 continue;
3789 }
3790
3791 if ( !PdeSrc.b.u1Size
3792 || !fBigPagesSupported)
3793 {
3794 /*
3795 * Page Table.
3796 */
3797 const GSTPT *pPTSrc;
3798 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3799 if (RT_FAILURE(rc))
3800 {
3801 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3802 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3803 cErrors++;
3804 continue;
3805 }
3806 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3807 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3808 {
3809 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3810 // (This problem will go away when/if we shadow multiple CR3s.)
3811 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3812 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3813 cErrors++;
3814 continue;
3815 }
3816 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3817 {
3818 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3819 GCPtr, (uint64_t)PdeDst.u));
3820 cErrors++;
3821 continue;
3822 }
3823
3824 /* iterate the page table. */
3825# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3826 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3827 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3828# else
3829 const unsigned offPTSrc = 0;
3830# endif
3831 for (unsigned iPT = 0, off = 0;
3832 iPT < RT_ELEMENTS(pPTDst->a);
3833 iPT++, off += PAGE_SIZE)
3834 {
3835 const SHWPTE PteDst = pPTDst->a[iPT];
3836
3837 /* skip not-present entries. */
3838 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3839 continue;
3840 Assert(PteDst.n.u1Present);
3841
3842 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3843 if (!PteSrc.n.u1Present)
3844 {
3845# ifdef IN_RING3
3846 PGMAssertHandlerAndFlagsInSync(pVM);
3847 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3848# endif
3849 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3850 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3851 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3852 cErrors++;
3853 continue;
3854 }
3855
3856 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3857# if 1 /** @todo sync accessed bit properly... */
3858 fIgnoreFlags |= X86_PTE_A;
3859# endif
3860
3861 /* match the physical addresses */
3862 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3863 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3864
3865# ifdef IN_RING3
3866 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3867 if (RT_FAILURE(rc))
3868 {
3869 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3870 {
3871 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3872 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3873 cErrors++;
3874 continue;
3875 }
3876 }
3877 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3878 {
3879 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3880 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3881 cErrors++;
3882 continue;
3883 }
3884# endif
3885
3886 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3887 if (!pPhysPage)
3888 {
3889# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3890 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3891 {
3892 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3893 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3894 cErrors++;
3895 continue;
3896 }
3897# endif
3898 if (PteDst.n.u1Write)
3899 {
3900 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3901 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3902 cErrors++;
3903 }
3904 fIgnoreFlags |= X86_PTE_RW;
3905 }
3906 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
3907 {
3908 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3909 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3910 cErrors++;
3911 continue;
3912 }
3913
3914 /* flags */
3915 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3916 {
3917 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3918 {
3919 if (PteDst.n.u1Write)
3920 {
3921 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3922 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3923 cErrors++;
3924 continue;
3925 }
3926 fIgnoreFlags |= X86_PTE_RW;
3927 }
3928 else
3929 {
3930 if (PteDst.n.u1Present)
3931 {
3932 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3933 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3934 cErrors++;
3935 continue;
3936 }
3937 fIgnoreFlags |= X86_PTE_P;
3938 }
3939 }
3940 else
3941 {
3942 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3943 {
3944 if (PteDst.n.u1Write)
3945 {
3946 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3947 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3948 cErrors++;
3949 continue;
3950 }
3951 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3952 {
3953 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3954 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3955 cErrors++;
3956 continue;
3957 }
3958 if (PteDst.n.u1Dirty)
3959 {
3960 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3961 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3962 cErrors++;
3963 }
3964# if 0 /** @todo sync access bit properly... */
3965 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3966 {
3967 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3968 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3969 cErrors++;
3970 }
3971 fIgnoreFlags |= X86_PTE_RW;
3972# else
3973 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3974# endif
3975 }
3976 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3977 {
3978 /* access bit emulation (not implemented). */
3979 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3980 {
3981 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3982 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3983 cErrors++;
3984 continue;
3985 }
3986 if (!PteDst.n.u1Accessed)
3987 {
3988 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3989 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3990 cErrors++;
3991 }
3992 fIgnoreFlags |= X86_PTE_P;
3993 }
3994# ifdef DEBUG_sandervl
3995 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3996# endif
3997 }
3998
3999 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4000 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4001 )
4002 {
4003 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4004 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4005 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4006 cErrors++;
4007 continue;
4008 }
4009 } /* foreach PTE */
4010 }
4011 else
4012 {
4013 /*
4014 * Big Page.
4015 */
4016 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4017 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4018 {
4019 if (PdeDst.n.u1Write)
4020 {
4021 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4022 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4023 cErrors++;
4024 continue;
4025 }
4026 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4027 {
4028 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4029 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4030 cErrors++;
4031 continue;
4032 }
4033# if 0 /** @todo sync access bit properly... */
4034 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4035 {
4036 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4037 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4038 cErrors++;
4039 }
4040 fIgnoreFlags |= X86_PTE_RW;
4041# else
4042 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4043# endif
4044 }
4045 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4046 {
4047 /* access bit emulation (not implemented). */
4048 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4049 {
4050 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4051 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4052 cErrors++;
4053 continue;
4054 }
4055 if (!PdeDst.n.u1Accessed)
4056 {
4057 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4058 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4059 cErrors++;
4060 }
4061 fIgnoreFlags |= X86_PTE_P;
4062 }
4063
4064 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4065 {
4066 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4067 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4068 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4069 cErrors++;
4070 }
4071
4072 /* iterate the page table. */
4073 for (unsigned iPT = 0, off = 0;
4074 iPT < RT_ELEMENTS(pPTDst->a);
4075 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4076 {
4077 const SHWPTE PteDst = pPTDst->a[iPT];
4078
4079 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4080 {
4081 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4082 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4083 cErrors++;
4084 }
4085
4086 /* skip not-present entries. */
4087 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4088 continue;
4089
4090 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4091
4092 /* match the physical addresses */
4093 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4094
4095# ifdef IN_RING3
4096 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4097 if (RT_FAILURE(rc))
4098 {
4099 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4100 {
4101 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4102 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4103 cErrors++;
4104 }
4105 }
4106 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4107 {
4108 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4109 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4110 cErrors++;
4111 continue;
4112 }
4113# endif
4114 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4115 if (!pPhysPage)
4116 {
4117# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4118 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4119 {
4120 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4121 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4122 cErrors++;
4123 continue;
4124 }
4125# endif
4126 if (PteDst.n.u1Write)
4127 {
4128 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4129 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4130 cErrors++;
4131 }
4132 fIgnoreFlags |= X86_PTE_RW;
4133 }
4134 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4135 {
4136 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4137 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4138 cErrors++;
4139 continue;
4140 }
4141
4142 /* flags */
4143 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4144 {
4145 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4146 {
4147 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4148 {
4149 if (PteDst.n.u1Write)
4150 {
4151 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4152 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4153 cErrors++;
4154 continue;
4155 }
4156 fIgnoreFlags |= X86_PTE_RW;
4157 }
4158 }
4159 else
4160 {
4161 if (PteDst.n.u1Present)
4162 {
4163 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4164 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4165 cErrors++;
4166 continue;
4167 }
4168 fIgnoreFlags |= X86_PTE_P;
4169 }
4170 }
4171
4172 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4173 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4174 )
4175 {
4176 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4177 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4178 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4179 cErrors++;
4180 continue;
4181 }
4182 } /* for each PTE */
4183 }
4184 }
4185 /* not present */
4186
4187 } /* for each PDE */
4188
4189 } /* for each PDPTE */
4190
4191 } /* for each PML4E */
4192
4193# ifdef DEBUG
4194 if (cErrors)
4195 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4196# endif
4197
4198#endif /* GST == 32BIT, PAE or AMD64 */
4199 return cErrors;
4200
4201#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4202}
4203#endif /* VBOX_STRICT */
4204
4205
4206/**
4207 * Sets up the CR3 for shadow paging
4208 *
4209 * @returns Strict VBox status code.
4210 * @retval VINF_SUCCESS.
4211 *
4212 * @param pVCpu The VMCPU handle.
4213 * @param GCPhysCR3 The physical address in the CR3 register.
4214 */
4215PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4216{
4217 PVM pVM = pVCpu->CTX_SUFF(pVM);
4218
4219 /* Update guest paging info. */
4220#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4221 || PGM_GST_TYPE == PGM_TYPE_PAE \
4222 || PGM_GST_TYPE == PGM_TYPE_AMD64
4223
4224 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4225
4226 /*
4227 * Map the page CR3 points at.
4228 */
4229 RTHCPTR HCPtrGuestCR3;
4230 RTHCPHYS HCPhysGuestCR3;
4231 pgmLock(pVM);
4232 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4233 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4234 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPage);
4235 /** @todo this needs some reworking wrt. locking. */
4236# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4237 HCPtrGuestCR3 = NIL_RTHCPTR;
4238 int rc = VINF_SUCCESS;
4239# else
4240 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3);
4241# endif
4242 pgmUnlock(pVM);
4243 if (RT_SUCCESS(rc))
4244 {
4245 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4246 if (RT_SUCCESS(rc))
4247 {
4248# ifdef IN_RC
4249 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4250# endif
4251# if PGM_GST_TYPE == PGM_TYPE_32BIT
4252 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4253# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4254 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4255# endif
4256 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))pVM->pgm.s.GCPtrCR3Mapping;
4257
4258# elif PGM_GST_TYPE == PGM_TYPE_PAE
4259 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4260 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4261# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4262 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4263# endif
4264 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RCPTRTYPE(uint8_t *))pVM->pgm.s.GCPtrCR3Mapping + off);
4265 Log(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4266
4267 /*
4268 * Map the 4 PDs too.
4269 */
4270 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVCpu->pgm.s);
4271 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4272 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4273 {
4274 if (pGuestPDPT->a[i].n.u1Present)
4275 {
4276 RTHCPTR HCPtr;
4277 RTHCPHYS HCPhys;
4278 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4279 pgmLock(pVM);
4280 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4281 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4282 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4283# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4284 HCPtr = NIL_RTHCPTR;
4285 int rc2 = VINF_SUCCESS;
4286# else
4287 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4288# endif
4289 pgmUnlock(pVM);
4290 if (RT_SUCCESS(rc2))
4291 {
4292 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4293 AssertRCReturn(rc, rc);
4294
4295 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4296# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4297 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4298# endif
4299 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))GCPtr;
4300 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4301# ifdef IN_RC
4302 PGM_INVL_PG(pVCpu, GCPtr);
4303# endif
4304 continue;
4305 }
4306 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4307 }
4308
4309 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4310# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4311 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4312# endif
4313 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4314 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4315# ifdef IN_RC
4316 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4317# endif
4318 }
4319
4320# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4321 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4322# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4323 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4324# endif
4325# endif
4326 }
4327 else
4328 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4329 }
4330 else
4331 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4332
4333#else /* prot/real stub */
4334 int rc = VINF_SUCCESS;
4335#endif
4336
4337 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4338# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4339 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4340 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4341 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4342 && PGM_GST_TYPE != PGM_TYPE_PROT))
4343
4344 Assert(!HWACCMIsNestedPagingActive(pVM));
4345
4346 /*
4347 * Update the shadow root page as well since that's not fixed.
4348 */
4349 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4350 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4351 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4352 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4353 PPGMPOOLPAGE pNewShwPageCR3;
4354
4355 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4356 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3);
4357 AssertFatalRC(rc);
4358 rc = VINF_SUCCESS;
4359
4360 /* Mark the page as locked; disallow flushing. */
4361 pgmPoolLockPage(pPool, pNewShwPageCR3);
4362
4363# ifdef IN_RC
4364 /* NOTE: We can't deal with jumps to ring 3 here as we're now in an inconsistent state! */
4365 bool fLog = VMMGCLogDisable(pVM);
4366 pgmLock(pVM);
4367# endif
4368
4369 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4370 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4371 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4372# ifdef IN_RING0
4373 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4374 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4375# elif defined(IN_RC)
4376 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4377 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4378# else
4379 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4380 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4381# endif
4382
4383# ifndef PGM_WITHOUT_MAPPINGS
4384 /*
4385 * Apply all hypervisor mappings to the new CR3.
4386 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4387 * make sure we check for conflicts in the new CR3 root.
4388 */
4389# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4390 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4391# endif
4392 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4393 AssertRCReturn(rc, rc);
4394# endif
4395
4396 /* Set the current hypervisor CR3. */
4397 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4398 SELMShadowCR3Changed(pVM, pVCpu);
4399
4400# ifdef IN_RC
4401 pgmUnlock(pVM);
4402 VMMGCLogRestore(pVM, fLog);
4403# endif
4404
4405 /* Clean up the old CR3 root. */
4406 if (pOldShwPageCR3)
4407 {
4408 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4409# ifndef PGM_WITHOUT_MAPPINGS
4410 /* Remove the hypervisor mappings from the shadow page table. */
4411 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4412# endif
4413 /* Mark the page as unlocked; allow flushing again. */
4414 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4415
4416 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4417 }
4418
4419# endif
4420
4421 return rc;
4422}
4423
4424/**
4425 * Unmaps the shadow CR3.
4426 *
4427 * @returns VBox status, no specials.
4428 * @param pVCpu The VMCPU handle.
4429 */
4430PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4431{
4432 LogFlow(("UnmapCR3\n"));
4433
4434 int rc = VINF_SUCCESS;
4435 PVM pVM = pVCpu->CTX_SUFF(pVM);
4436
4437 /*
4438 * Update guest paging info.
4439 */
4440#if PGM_GST_TYPE == PGM_TYPE_32BIT
4441 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4442# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4443 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4444# endif
4445 pVCpu->pgm.s.pGst32BitPdRC = 0;
4446
4447#elif PGM_GST_TYPE == PGM_TYPE_PAE
4448 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4449# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4450 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4451# endif
4452 pVCpu->pgm.s.pGstPaePdptRC = 0;
4453 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4454 {
4455 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4456# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4457 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4458# endif
4459 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4460 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4461 }
4462
4463#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4464 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4465# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4466 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4467# endif
4468
4469#else /* prot/real mode stub */
4470 /* nothing to do */
4471#endif
4472
4473#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4474 /*
4475 * Update shadow paging info.
4476 */
4477# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4478 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4479 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4480
4481# if PGM_GST_TYPE != PGM_TYPE_REAL
4482 Assert(!HWACCMIsNestedPagingActive(pVM));
4483# endif
4484
4485# ifndef PGM_WITHOUT_MAPPINGS
4486 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4487 /* Remove the hypervisor mappings from the shadow page table. */
4488 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4489# endif
4490
4491 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4492 {
4493 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4494
4495 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4496
4497 /* Mark the page as unlocked; allow flushing again. */
4498 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4499
4500 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4501 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4502 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4503 pVCpu->pgm.s.pShwPageCR3RC = 0;
4504 pVCpu->pgm.s.iShwUser = 0;
4505 pVCpu->pgm.s.iShwUserTable = 0;
4506 }
4507# endif
4508#endif /* !IN_RC*/
4509
4510 return rc;
4511}
4512
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette