VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 29206

Last change on this file since 29206 was 28800, checked in by vboxsync, 15 years ago

Automated rebranding to Oracle copyright/license strings via filemuncher

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 199.5 KB
Line 
1/* $Id: PGMAllBth.h 28800 2010-04-27 08:22:32Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Oracle Corporation
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 */
19
20/*******************************************************************************
21* Internal Functions *
22*******************************************************************************/
23RT_C_DECLS_BEGIN
24PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
25PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
26PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
27PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
28PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
29PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
31PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
33#ifdef VBOX_STRICT
34PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
35#endif
36DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte);
37PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
38PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
39RT_C_DECLS_END
40
41
42/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
43#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
44# error "Invalid combination; PAE guest implies PAE shadow"
45#endif
46
47#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
48 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
49# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
50#endif
51
52#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
53 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
54# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
55#endif
56
57#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
58 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
59# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
60#endif
61
62
63#ifndef IN_RING3
64/**
65 * #PF Handler for raw-mode guest execution.
66 *
67 * @returns VBox status code (appropriate for trap handling and GC return).
68 *
69 * @param pVCpu VMCPU Handle.
70 * @param uErr The trap error code.
71 * @param pRegFrame Trap register frame.
72 * @param pvFault The fault address.
73 * @param pfLockTaken PGM lock taken here or not (out)
74 */
75PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
76{
77 PVM pVM = pVCpu->CTX_SUFF(pVM);
78
79 *pfLockTaken = false;
80
81# if defined(IN_RC) && defined(VBOX_STRICT)
82 PGMDynCheckLocks(pVM);
83# endif
84
85# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
86 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
87 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
88
89# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
90 /*
91 * Hide the instruction fetch trap indicator for now.
92 */
93 /** @todo NXE will change this and we must fix NXE in the switcher too! */
94 if (uErr & X86_TRAP_PF_ID)
95 {
96 uErr &= ~X86_TRAP_PF_ID;
97 TRPMSetErrorCode(pVCpu, uErr);
98 }
99# endif
100
101 /*
102 * Get PDs.
103 */
104 int rc;
105# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
106# if PGM_GST_TYPE == PGM_TYPE_32BIT
107 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
108 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
109
110# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
111
112# if PGM_GST_TYPE == PGM_TYPE_PAE
113 unsigned iPDSrc = 0; /* initialized to shut up gcc */
114 X86PDPE PdpeSrc;
115 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
116
117# elif PGM_GST_TYPE == PGM_TYPE_AMD64
118 unsigned iPDSrc = 0; /* initialized to shut up gcc */
119 PX86PML4E pPml4eSrc;
120 X86PDPE PdpeSrc;
121 PGSTPD pPDSrc;
122
123 pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
124 Assert(pPml4eSrc);
125# endif
126
127 /* Quick check for a valid guest trap. (PAE & AMD64) */
128 if (!pPDSrc)
129 {
130# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
131 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
132# else
133 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
134# endif
135 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
136 TRPMSetErrorCode(pVCpu, uErr);
137 return VINF_EM_RAW_GUEST_TRAP;
138 }
139# endif
140
141# else /* !PGM_WITH_PAGING */
142 PGSTPD pPDSrc = NULL;
143 const unsigned iPDSrc = 0;
144# endif /* !PGM_WITH_PAGING */
145
146# if !defined(PGM_WITHOUT_MAPPINGS) && ((PGM_GST_TYPE == PGM_TYPE_32BIT) || (PGM_GST_TYPE == PGM_TYPE_PAE))
147 /*
148 * Check for write conflicts with our hypervisor mapping early on. If the guest happens to access a non-present page,
149 * where our hypervisor is currently mapped, then we'll create a #PF storm in the guest.
150 */
151 if ( (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW)
152 && MMHyperIsInsideArea(pVM, pvFault))
153 {
154 /* Force a CR3 sync to check for conflicts and emulate the instruction. */
155 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
156 return VINF_EM_RAW_EMULATE_INSTR;
157 }
158# endif
159
160 /* First check for a genuine guest page fault. */
161# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
162 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
163 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDSrc->a[iPDSrc], pvFault);
164 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
165 if (rc == VINF_EM_RAW_GUEST_TRAP)
166 {
167 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
168 = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
169 return rc;
170 }
171# endif /* PGM_WITH_PAGING */
172
173 /* Take the big lock now. */
174 *pfLockTaken = true;
175 pgmLock(pVM);
176
177 /* Fetch the guest PDE */
178# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
179 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
180# else
181 GSTPDE PdeSrc;
182 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
183 PdeSrc.n.u1Present = 1;
184 PdeSrc.n.u1Write = 1;
185 PdeSrc.n.u1Accessed = 1;
186 PdeSrc.n.u1User = 1;
187# endif
188
189# if PGM_SHW_TYPE == PGM_TYPE_32BIT
190 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
191 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
192
193# elif PGM_SHW_TYPE == PGM_TYPE_PAE
194 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
195
196 PX86PDPAE pPDDst;
197# if PGM_GST_TYPE != PGM_TYPE_PAE
198 X86PDPE PdpeSrc;
199
200 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
201 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
202# endif
203 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
204 if (rc != VINF_SUCCESS)
205 {
206 AssertRC(rc);
207 return rc;
208 }
209 Assert(pPDDst);
210
211# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
212 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
213 PX86PDPAE pPDDst;
214# if PGM_GST_TYPE == PGM_TYPE_PROT
215 /* AMD-V nested paging */
216 X86PML4E Pml4eSrc;
217 X86PDPE PdpeSrc;
218 PX86PML4E pPml4eSrc = &Pml4eSrc;
219
220 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
221 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
222 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
223# endif
224
225 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
226 if (rc != VINF_SUCCESS)
227 {
228 AssertRC(rc);
229 return rc;
230 }
231 Assert(pPDDst);
232
233# elif PGM_SHW_TYPE == PGM_TYPE_EPT
234 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
235 PEPTPD pPDDst;
236
237 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
238 if (rc != VINF_SUCCESS)
239 {
240 AssertRC(rc);
241 return rc;
242 }
243 Assert(pPDDst);
244# endif
245
246# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
247 /* Dirty page handling. */
248 if (uErr & X86_TRAP_PF_RW) /* write fault? */
249 {
250 /*
251 * If we successfully correct the write protection fault due to dirty bit
252 * tracking, then return immediately.
253 */
254 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
255 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
256 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
257 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
258 {
259 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
260 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
261 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
262 return VINF_SUCCESS;
263 }
264 }
265
266# if 0 /* rarely useful; leave for debugging. */
267 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
268# endif
269# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
270
271 /*
272 * A common case is the not-present error caused by lazy page table syncing.
273 *
274 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
275 * so we can safely assume that the shadow PT is present when calling SyncPage later.
276 *
277 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
278 * of mapping conflict and defer to SyncCR3 in R3.
279 * (Again, we do NOT support access handlers for non-present guest pages.)
280 *
281 */
282 Assert(PdeSrc.n.u1Present);
283 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
284 && !pPDDst->a[iPDDst].n.u1Present
285 )
286 {
287 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
288 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
289 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
290 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
291 if (RT_SUCCESS(rc))
292 {
293 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
294 return rc;
295 }
296 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
297 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
298 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
299 return VINF_PGM_SYNC_CR3;
300 }
301
302# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
303 /*
304 * Check if this address is within any of our mappings.
305 *
306 * This is *very* fast and it's gonna save us a bit of effort below and prevent
307 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
308 * (BTW, it's impossible to have physical access handlers in a mapping.)
309 */
310 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
311 {
312 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
313 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
314 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
315 {
316 if (pvFault < pMapping->GCPtr)
317 break;
318 if (pvFault - pMapping->GCPtr < pMapping->cb)
319 {
320 /*
321 * The first thing we check is if we've got an undetected conflict.
322 */
323 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
324 {
325 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
326 while (iPT-- > 0)
327 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
328 {
329 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
330 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
331 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
332 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
333 return VINF_PGM_SYNC_CR3;
334 }
335 }
336
337 /*
338 * Check if the fault address is in a virtual page access handler range.
339 */
340 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
341 if ( pCur
342 && pvFault - pCur->Core.Key < pCur->cb
343 && uErr & X86_TRAP_PF_RW)
344 {
345# ifdef IN_RC
346 STAM_PROFILE_START(&pCur->Stat, h);
347 pgmUnlock(pVM);
348 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
349 pgmLock(pVM);
350 STAM_PROFILE_STOP(&pCur->Stat, h);
351# else
352 AssertFailed();
353 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
354# endif
355 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
356 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
357 return rc;
358 }
359
360 /*
361 * Pretend we're not here and let the guest handle the trap.
362 */
363 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
364 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
365 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
366 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
367 return VINF_EM_RAW_GUEST_TRAP;
368 }
369 }
370 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
371 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
372# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
373
374 /*
375 * Check if this fault address is flagged for special treatment,
376 * which means we'll have to figure out the physical address and
377 * check flags associated with it.
378 *
379 * ASSUME that we can limit any special access handling to pages
380 * in page tables which the guest believes to be present.
381 */
382 Assert(PdeSrc.n.u1Present);
383 {
384 RTGCPHYS GCPhys = NIL_RTGCPHYS;
385
386# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
387 if ( PdeSrc.b.u1Size
388# if PGM_GST_TYPE == PGM_TYPE_32BIT
389 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
390# endif
391 )
392 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
393 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
394 else
395 {
396 PGSTPT pPTSrc;
397 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
398 if (RT_SUCCESS(rc))
399 {
400 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
401 if (pPTSrc->a[iPTESrc].n.u1Present)
402 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
403 }
404 }
405# else
406 /* No paging so the fault address is the physical address */
407 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
408# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
409
410 /*
411 * If we have a GC address we'll check if it has any flags set.
412 */
413 if (GCPhys != NIL_RTGCPHYS)
414 {
415 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
416
417 PPGMPAGE pPage;
418 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
419 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
420 {
421 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
422 {
423 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
424 {
425 /*
426 * Physical page access handler.
427 */
428 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
429 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
430 if (pCur)
431 {
432# ifdef PGM_SYNC_N_PAGES
433 /*
434 * If the region is write protected and we got a page not present fault, then sync
435 * the pages. If the fault was caused by a read, then restart the instruction.
436 * In case of write access continue to the GC write handler.
437 *
438 * ASSUMES that there is only one handler per page or that they have similar write properties.
439 */
440 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
441 && !(uErr & X86_TRAP_PF_P))
442 {
443 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
444 if ( RT_FAILURE(rc)
445 || !(uErr & X86_TRAP_PF_RW)
446 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
447 {
448 AssertRC(rc);
449 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
450 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
451 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
452 return rc;
453 }
454 }
455# endif
456
457 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
458 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
459 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
460
461# if defined(IN_RC) || defined(IN_RING0)
462 if (pCur->CTX_SUFF(pfnHandler))
463 {
464 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
465# ifdef IN_RING0
466 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
467# else
468 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
469# endif
470 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
471 void *pvUser = pCur->CTX_SUFF(pvUser);
472
473 STAM_PROFILE_START(&pCur->Stat, h);
474 if (fLeaveLock)
475 pgmUnlock(pVM); /* @todo: Not entirely safe. */
476
477 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
478 if (fLeaveLock)
479 pgmLock(pVM);
480# ifdef VBOX_WITH_STATISTICS
481 pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
482 if (pCur)
483 STAM_PROFILE_STOP(&pCur->Stat, h);
484# else
485 pCur = NULL; /* might be invalid by now. */
486# endif
487
488 }
489 else
490# endif
491 rc = VINF_EM_RAW_EMULATE_INSTR;
492
493 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
494 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
495 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
496 return rc;
497 }
498 }
499# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
500 else
501 {
502# ifdef PGM_SYNC_N_PAGES
503 /*
504 * If the region is write protected and we got a page not present fault, then sync
505 * the pages. If the fault was caused by a read, then restart the instruction.
506 * In case of write access continue to the GC write handler.
507 */
508 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
509 && !(uErr & X86_TRAP_PF_P))
510 {
511 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
512 if ( RT_FAILURE(rc)
513 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
514 || !(uErr & X86_TRAP_PF_RW))
515 {
516 AssertRC(rc);
517 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
518 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
519 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
520 return rc;
521 }
522 }
523# endif
524 /*
525 * Ok, it's an virtual page access handler.
526 *
527 * Since it's faster to search by address, we'll do that first
528 * and then retry by GCPhys if that fails.
529 */
530 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
531 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
532 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
533 */
534 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
535 if (pCur)
536 {
537 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
538 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
539 || !(uErr & X86_TRAP_PF_P)
540 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
541 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
542
543 if ( pvFault - pCur->Core.Key < pCur->cb
544 && ( uErr & X86_TRAP_PF_RW
545 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
546 {
547# ifdef IN_RC
548 STAM_PROFILE_START(&pCur->Stat, h);
549 pgmUnlock(pVM);
550 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
551 pgmLock(pVM);
552 STAM_PROFILE_STOP(&pCur->Stat, h);
553# else
554 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
555# endif
556 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
557 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
558 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
559 return rc;
560 }
561 /* Unhandled part of a monitored page */
562 }
563 else
564 {
565 /* Check by physical address. */
566 unsigned iPage;
567 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
568 &pCur, &iPage);
569 Assert(RT_SUCCESS(rc) || !pCur);
570 if ( pCur
571 && ( uErr & X86_TRAP_PF_RW
572 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
573 {
574 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
575# ifdef IN_RC
576 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
577 Assert(off < pCur->cb);
578 STAM_PROFILE_START(&pCur->Stat, h);
579 pgmUnlock(pVM);
580 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
581 pgmLock(pVM);
582 STAM_PROFILE_STOP(&pCur->Stat, h);
583# else
584 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
585# endif
586 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
587 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
588 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
589 return rc;
590 }
591 }
592 }
593# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
594
595 /*
596 * There is a handled area of the page, but this fault doesn't belong to it.
597 * We must emulate the instruction.
598 *
599 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
600 * we first check if this was a page-not-present fault for a page with only
601 * write access handlers. Restart the instruction if it wasn't a write access.
602 */
603 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
604
605 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
606 && !(uErr & X86_TRAP_PF_P))
607 {
608 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
609 if ( RT_FAILURE(rc)
610 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
611 || !(uErr & X86_TRAP_PF_RW))
612 {
613 AssertRC(rc);
614 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
615 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
616 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
617 return rc;
618 }
619 }
620
621 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
622 * It's writing to an unhandled part of the LDT page several million times.
623 */
624 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
625 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
626 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
627 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
628 return rc;
629 } /* if any kind of handler */
630
631# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
632 if (uErr & X86_TRAP_PF_P)
633 {
634 /*
635 * The page isn't marked, but it might still be monitored by a virtual page access handler.
636 * (ASSUMES no temporary disabling of virtual handlers.)
637 */
638 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
639 * we should correct both the shadow page table and physical memory flags, and not only check for
640 * accesses within the handler region but for access to pages with virtual handlers. */
641 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
642 if (pCur)
643 {
644 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
645 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
646 || !(uErr & X86_TRAP_PF_P)
647 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
648 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
649
650 if ( pvFault - pCur->Core.Key < pCur->cb
651 && ( uErr & X86_TRAP_PF_RW
652 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
653 {
654# ifdef IN_RC
655 STAM_PROFILE_START(&pCur->Stat, h);
656 pgmUnlock(pVM);
657 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
658 pgmLock(pVM);
659 STAM_PROFILE_STOP(&pCur->Stat, h);
660# else
661 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
662# endif
663 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
664 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
665 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
666 return rc;
667 }
668 }
669 }
670# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
671 }
672 else
673 {
674 /*
675 * When the guest accesses invalid physical memory (e.g. probing
676 * of RAM or accessing a remapped MMIO range), then we'll fall
677 * back to the recompiler to emulate the instruction.
678 */
679 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
680 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
681 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
682 return VINF_EM_RAW_EMULATE_INSTR;
683 }
684
685 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
686
687# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
688 /*
689 * We are here only if page is present in Guest page tables and
690 * trap is not handled by our handlers.
691 *
692 * Check it for page out-of-sync situation.
693 */
694 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
695
696 if (!(uErr & X86_TRAP_PF_P))
697 {
698 /*
699 * Page is not present in our page tables.
700 * Try to sync it!
701 * BTW, fPageShw is invalid in this branch!
702 */
703 if (uErr & X86_TRAP_PF_US)
704 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
705 else /* supervisor */
706 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
707
708 if (PGM_PAGE_IS_BALLOONED(pPage))
709 {
710 /* Emulate reads from ballooned pages as they are not present in our shadow page tables. (required for e.g. Solaris guests; soft ecc, random nr generator) */
711 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
712 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
713 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
714 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
715 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
716 return rc;
717 }
718# if defined(LOG_ENABLED) && !defined(IN_RING0)
719 RTGCPHYS GCPhys2;
720 uint64_t fPageGst2;
721 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
722 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
723 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
724# endif /* LOG_ENABLED */
725
726# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
727 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
728 {
729 uint64_t fPageGst;
730 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
731 if ( RT_SUCCESS(rc)
732 && !(fPageGst & X86_PTE_US))
733 {
734 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
735 if ( pvFault == (RTGCPTR)pRegFrame->eip
736 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
737# ifdef CSAM_DETECT_NEW_CODE_PAGES
738 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
739 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
740# endif /* CSAM_DETECT_NEW_CODE_PAGES */
741 )
742 {
743 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
744 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
745 if (rc != VINF_SUCCESS)
746 {
747 /*
748 * CSAM needs to perform a job in ring 3.
749 *
750 * Sync the page before going to the host context; otherwise we'll end up in a loop if
751 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
752 */
753 LogFlow(("CSAM ring 3 job\n"));
754 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
755 AssertRC(rc2);
756
757 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
758 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
759 return rc;
760 }
761 }
762# ifdef CSAM_DETECT_NEW_CODE_PAGES
763 else if ( uErr == X86_TRAP_PF_RW
764 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
765 && pRegFrame->ecx < 0x10000)
766 {
767 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
768 * to detect loading of new code pages.
769 */
770
771 /*
772 * Decode the instruction.
773 */
774 RTGCPTR PC;
775 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
776 if (rc == VINF_SUCCESS)
777 {
778 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
779 uint32_t cbOp;
780 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
781
782 /* For now we'll restrict this to rep movsw/d instructions */
783 if ( rc == VINF_SUCCESS
784 && pDis->pCurInstr->opcode == OP_MOVSWD
785 && (pDis->prefix & PREFIX_REP))
786 {
787 CSAMMarkPossibleCodePage(pVM, pvFault);
788 }
789 }
790 }
791# endif /* CSAM_DETECT_NEW_CODE_PAGES */
792
793 /*
794 * Mark this page as safe.
795 */
796 /** @todo not correct for pages that contain both code and data!! */
797 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
798 CSAMMarkPage(pVM, pvFault, true);
799 }
800 }
801# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
802 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
803 if (RT_SUCCESS(rc))
804 {
805 /* The page was successfully synced, return to the guest. */
806 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
807 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
808 return VINF_SUCCESS;
809 }
810 }
811 else /* uErr & X86_TRAP_PF_P: */
812 {
813 /*
814 * Write protected pages are make writable when the guest makes the first
815 * write to it. This happens for pages that are shared, write monitored
816 * and not yet allocated.
817 *
818 * Also, a side effect of not flushing global PDEs are out of sync pages due
819 * to physical monitored regions, that are no longer valid.
820 * Assume for now it only applies to the read/write flag.
821 */
822 if ( RT_SUCCESS(rc)
823 && (uErr & X86_TRAP_PF_RW))
824 {
825 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
826 {
827 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
828 Assert(!PGM_PAGE_IS_ZERO(pPage));
829 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
830
831 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
832 if (rc != VINF_SUCCESS)
833 {
834 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
835 return rc;
836 }
837 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
838 return VINF_EM_NO_MEMORY;
839 }
840
841# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
842 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
843 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
844 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG))
845 {
846 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
847 uint64_t fPageGst;
848 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
849 if ( RT_SUCCESS(rc)
850 && !(fPageGst & X86_PTE_RW))
851 {
852 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
853 if (RT_SUCCESS(rc))
854 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
855 else
856 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
857 return rc;
858 }
859 AssertMsg(RT_SUCCESS(rc), ("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
860 }
861# endif
862 /// @todo count the above case; else
863 if (uErr & X86_TRAP_PF_US)
864 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
865 else /* supervisor */
866 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
867
868 /*
869 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
870 * page is not present, which is not true in this case.
871 */
872 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
873 if (RT_SUCCESS(rc))
874 {
875 /*
876 * Page was successfully synced, return to guest.
877 * First invalidate the page as it might be in the TLB.
878 */
879# if PGM_SHW_TYPE == PGM_TYPE_EPT
880 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
881# else
882 PGM_INVL_PG(pVCpu, pvFault);
883# endif
884# ifdef VBOX_STRICT
885 RTGCPHYS GCPhys2;
886 uint64_t fPageGst;
887 if (!HWACCMIsNestedPagingActive(pVM))
888 {
889 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
890 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%d fPageGst=%RX64\n"));
891 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
892 }
893 uint64_t fPageShw;
894 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
895 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
896# endif /* VBOX_STRICT */
897 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
898 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
899 return VINF_SUCCESS;
900 }
901 }
902
903# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
904# ifdef VBOX_STRICT
905 /*
906 * Check for VMM page flags vs. Guest page flags consistency.
907 * Currently only for debug purposes.
908 */
909 if (RT_SUCCESS(rc))
910 {
911 /* Get guest page flags. */
912 uint64_t fPageGst;
913 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
914 if (RT_SUCCESS(rc))
915 {
916 uint64_t fPageShw;
917 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
918
919 /*
920 * Compare page flags.
921 * Note: we have AVL, A, D bits desynched.
922 */
923 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
924 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
925 }
926 else
927 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
928 }
929 else
930 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
931# endif /* VBOX_STRICT */
932# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
933 }
934 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
935# endif /* PGM_OUT_OF_SYNC_IN_GC */
936 }
937 else /* GCPhys == NIL_RTGCPHYS */
938 {
939 /*
940 * Page not present in Guest OS or invalid page table address.
941 * This is potential virtual page access handler food.
942 *
943 * For the present we'll say that our access handlers don't
944 * work for this case - we've already discarded the page table
945 * not present case which is identical to this.
946 *
947 * When we perchance find we need this, we will probably have AVL
948 * trees (offset based) to operate on and we can measure their speed
949 * agains mapping a page table and probably rearrange this handling
950 * a bit. (Like, searching virtual ranges before checking the
951 * physical address.)
952 */
953 }
954 }
955
956# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
957 /*
958 * Conclusion, this is a guest trap.
959 */
960 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
961 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
962 return VINF_EM_RAW_GUEST_TRAP;
963# else
964 /* present, but not a monitored page; perhaps the guest is probing physical memory */
965 return VINF_EM_RAW_EMULATE_INSTR;
966# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
967
968
969# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
970
971 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
972 return VERR_INTERNAL_ERROR;
973# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
974}
975#endif /* !IN_RING3 */
976
977
978/**
979 * Emulation of the invlpg instruction.
980 *
981 *
982 * @returns VBox status code.
983 *
984 * @param pVCpu The VMCPU handle.
985 * @param GCPtrPage Page to invalidate.
986 *
987 * @remark ASSUMES that the guest is updating before invalidating. This order
988 * isn't required by the CPU, so this is speculative and could cause
989 * trouble.
990 * @remark No TLB shootdown is done on any other VCPU as we assume that
991 * invlpg emulation is the *only* reason for calling this function.
992 * (The guest has to shoot down TLB entries on other CPUs itself)
993 * Currently true, but keep in mind!
994 *
995 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
996 */
997PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
998{
999#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1000 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1001 && PGM_SHW_TYPE != PGM_TYPE_EPT
1002 int rc;
1003 PVM pVM = pVCpu->CTX_SUFF(pVM);
1004 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1005
1006 Assert(PGMIsLockOwner(pVM));
1007
1008 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1009
1010# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1011 if (pPool->cDirtyPages)
1012 pgmPoolResetDirtyPages(pVM);
1013# endif
1014
1015 /*
1016 * Get the shadow PD entry and skip out if this PD isn't present.
1017 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1018 */
1019# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1020 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1021 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1022
1023 /* Fetch the pgm pool shadow descriptor. */
1024 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1025 Assert(pShwPde);
1026
1027# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1028 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1029 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
1030
1031 /* If the shadow PDPE isn't present, then skip the invalidate. */
1032 if (!pPdptDst->a[iPdpt].n.u1Present)
1033 {
1034 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1035 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1036 return VINF_SUCCESS;
1037 }
1038
1039 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1040 PPGMPOOLPAGE pShwPde = NULL;
1041 PX86PDPAE pPDDst;
1042
1043 /* Fetch the pgm pool shadow descriptor. */
1044 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1045 AssertRCSuccessReturn(rc, rc);
1046 Assert(pShwPde);
1047
1048 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1049 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1050
1051# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1052 /* PML4 */
1053 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1054 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1055 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1056 PX86PDPAE pPDDst;
1057 PX86PDPT pPdptDst;
1058 PX86PML4E pPml4eDst;
1059 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1060 if (rc != VINF_SUCCESS)
1061 {
1062 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1063 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1064 return VINF_SUCCESS;
1065 }
1066 Assert(pPDDst);
1067
1068 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1069 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1070
1071 if (!pPdpeDst->n.u1Present)
1072 {
1073 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1074 return VINF_SUCCESS;
1075 }
1076
1077 /* Fetch the pgm pool shadow descriptor. */
1078 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1079 Assert(pShwPde);
1080
1081# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1082
1083 const SHWPDE PdeDst = *pPdeDst;
1084 if (!PdeDst.n.u1Present)
1085 {
1086 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1087 return VINF_SUCCESS;
1088 }
1089
1090# if defined(IN_RC)
1091 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1092 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1093# endif
1094
1095 /*
1096 * Get the guest PD entry and calc big page.
1097 */
1098# if PGM_GST_TYPE == PGM_TYPE_32BIT
1099 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
1100 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1101 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1102# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1103 unsigned iPDSrc = 0;
1104# if PGM_GST_TYPE == PGM_TYPE_PAE
1105 X86PDPE PdpeSrc;
1106 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1107# else /* AMD64 */
1108 PX86PML4E pPml4eSrc;
1109 X86PDPE PdpeSrc;
1110 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1111# endif
1112 GSTPDE PdeSrc;
1113
1114 if (pPDSrc)
1115 PdeSrc = pPDSrc->a[iPDSrc];
1116 else
1117 PdeSrc.u = 0;
1118# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1119
1120# if PGM_GST_TYPE == PGM_TYPE_32BIT
1121 const bool fIsBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1122# else
1123 const bool fIsBigPage = PdeSrc.b.u1Size;
1124# endif
1125
1126# ifdef IN_RING3
1127 /*
1128 * If a CR3 Sync is pending we may ignore the invalidate page operation
1129 * depending on the kind of sync and if it's a global page or not.
1130 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1131 */
1132# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1133 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1134 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1135 && fIsBigPage
1136 && PdeSrc.b.u1Global
1137 )
1138 )
1139# else
1140 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1141# endif
1142 {
1143 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1144 return VINF_SUCCESS;
1145 }
1146# endif /* IN_RING3 */
1147
1148 /*
1149 * Deal with the Guest PDE.
1150 */
1151 rc = VINF_SUCCESS;
1152 if (PdeSrc.n.u1Present)
1153 {
1154 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1155 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1156# ifndef PGM_WITHOUT_MAPPING
1157 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1158 {
1159 /*
1160 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1161 */
1162 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1163 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1164 pgmLock(pVM);
1165 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1166 pgmUnlock(pVM);
1167 }
1168 else
1169# endif /* !PGM_WITHOUT_MAPPING */
1170 if (!fIsBigPage)
1171 {
1172 /*
1173 * 4KB - page.
1174 */
1175 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1176 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1177
1178# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1179 /* Reset the modification counter (OpenSolaris trashes tlb entries very often) */
1180 if (pShwPage->cModifications)
1181 pShwPage->cModifications = 1;
1182# endif
1183
1184# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1185 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1186 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1187# endif
1188 if (pShwPage->GCPhys == GCPhys)
1189 {
1190# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1191 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1192 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1193 if (pPT->a[iPTEDst].n.u1Present)
1194 {
1195 /* This is very unlikely with caching/monitoring enabled. */
1196 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK, iPTEDst);
1197 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1198 }
1199# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1200 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1201 if (RT_SUCCESS(rc))
1202 rc = VINF_SUCCESS;
1203# endif
1204 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1205 PGM_INVL_PG(pVCpu, GCPtrPage);
1206 }
1207 else
1208 {
1209 /*
1210 * The page table address changed.
1211 */
1212 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1213 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1214 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1215 ASMAtomicWriteSize(pPdeDst, 0);
1216 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1217 PGM_INVL_VCPU_TLBS(pVCpu);
1218 }
1219 }
1220 else
1221 {
1222 /*
1223 * 2/4MB - page.
1224 */
1225 /* Before freeing the page, check if anything really changed. */
1226 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1227 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1228# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1229 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1230 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1231# endif
1232 if ( pShwPage->GCPhys == GCPhys
1233 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1234 {
1235 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1236 /** @todo PAT */
1237 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1238 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1239 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1240 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1241 {
1242 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1243 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1244# if defined(IN_RC)
1245 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1246 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1247# endif
1248 return VINF_SUCCESS;
1249 }
1250 }
1251
1252 /*
1253 * Ok, the page table is present and it's been changed in the guest.
1254 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1255 * We could do this for some flushes in GC too, but we need an algorithm for
1256 * deciding which 4MB pages containing code likely to be executed very soon.
1257 */
1258 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1259 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1260 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1261 ASMAtomicWriteSize(pPdeDst, 0);
1262 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1263 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1264 }
1265 }
1266 else
1267 {
1268 /*
1269 * Page directory is not present, mark shadow PDE not present.
1270 */
1271 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1272 {
1273 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1274 ASMAtomicWriteSize(pPdeDst, 0);
1275 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1276 PGM_INVL_PG(pVCpu, GCPtrPage);
1277 }
1278 else
1279 {
1280 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1281 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1282 }
1283 }
1284# if defined(IN_RC)
1285 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1286 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1287# endif
1288 return rc;
1289
1290#else /* guest real and protected mode */
1291 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1292 return VINF_SUCCESS;
1293#endif
1294}
1295
1296
1297/**
1298 * Update the tracking of shadowed pages.
1299 *
1300 * @param pVCpu The VMCPU handle.
1301 * @param pShwPage The shadow page.
1302 * @param HCPhys The physical page we is being dereferenced.
1303 * @param iPte Shadow PTE index
1304 */
1305DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte)
1306{
1307 PVM pVM = pVCpu->CTX_SUFF(pVM);
1308
1309 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1310 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1311
1312 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1313 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1314 * 2. write protect all shadowed pages. I.e. implement caching.
1315 */
1316 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1317
1318 /*
1319 * Find the guest address.
1320 */
1321 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1322 pRam;
1323 pRam = pRam->CTX_SUFF(pNext))
1324 {
1325 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1326 while (iPage-- > 0)
1327 {
1328 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1329 {
1330 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1331
1332 Assert(pShwPage->cPresent);
1333 Assert(pPool->cPresent);
1334 pShwPage->cPresent--;
1335 pPool->cPresent--;
1336
1337 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1338 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1339 return;
1340 }
1341 }
1342 }
1343
1344 for (;;)
1345 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1346}
1347
1348
1349/**
1350 * Update the tracking of shadowed pages.
1351 *
1352 * @param pVCpu The VMCPU handle.
1353 * @param pShwPage The shadow page.
1354 * @param u16 The top 16-bit of the pPage->HCPhys.
1355 * @param pPage Pointer to the guest page. this will be modified.
1356 * @param iPTDst The index into the shadow table.
1357 */
1358DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1359{
1360 PVM pVM = pVCpu->CTX_SUFF(pVM);
1361 /*
1362 * Just deal with the simple first time here.
1363 */
1364 if (!u16)
1365 {
1366 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1367 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1368 /* Save the page table index. */
1369 PGM_PAGE_SET_PTE_INDEX(pPage, iPTDst);
1370 }
1371 else
1372 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1373
1374 /* write back */
1375 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1376 PGM_PAGE_SET_TRACKING(pPage, u16);
1377
1378 /* update statistics. */
1379 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1380 pShwPage->cPresent++;
1381 if (pShwPage->iFirstPresent > iPTDst)
1382 pShwPage->iFirstPresent = iPTDst;
1383}
1384
1385
1386/**
1387 * Creates a 4K shadow page for a guest page.
1388 *
1389 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1390 * physical address. The PdeSrc argument only the flags are used. No page structured
1391 * will be mapped in this function.
1392 *
1393 * @param pVCpu The VMCPU handle.
1394 * @param pPteDst Destination page table entry.
1395 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1396 * Can safely assume that only the flags are being used.
1397 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1398 * @param pShwPage Pointer to the shadow page.
1399 * @param iPTDst The index into the shadow table.
1400 *
1401 * @remark Not used for 2/4MB pages!
1402 */
1403DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1404{
1405 if (PteSrc.n.u1Present)
1406 {
1407 PVM pVM = pVCpu->CTX_SUFF(pVM);
1408
1409# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1410 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1411 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64)
1412 if (pShwPage->fDirty)
1413 {
1414 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1415 PX86PTPAE pGstPT;
1416
1417 pGstPT = (PX86PTPAE)&pPool->aDirtyPages[pShwPage->idxDirty][0];
1418 pGstPT->a[iPTDst].u = PteSrc.u;
1419 }
1420# endif
1421 /*
1422 * Find the ram range.
1423 */
1424 PPGMPAGE pPage;
1425 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1426 if (RT_SUCCESS(rc))
1427 {
1428 /* Ignore ballooned pages. Don't return errors or use a fatal assert here as part of a shadow sync range might included ballooned pages. */
1429 if (PGM_PAGE_IS_BALLOONED(pPage))
1430 return;
1431
1432#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1433 /* Try to make the page writable if necessary. */
1434 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1435 && ( PGM_PAGE_IS_ZERO(pPage)
1436 || ( PteSrc.n.u1Write
1437 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1438# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1439 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1440# endif
1441 )
1442 )
1443 )
1444 {
1445 rc = pgmPhysPageMakeWritable(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1446 AssertRC(rc);
1447 }
1448#endif
1449
1450 /** @todo investiage PWT, PCD and PAT. */
1451 /*
1452 * Make page table entry.
1453 */
1454 SHWPTE PteDst;
1455 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1456 {
1457 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1458 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1459 {
1460#if PGM_SHW_TYPE == PGM_TYPE_EPT
1461 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1462 PteDst.n.u1Present = 1;
1463 PteDst.n.u1Execute = 1;
1464 PteDst.n.u1IgnorePAT = 1;
1465 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1466 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1467#else
1468 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1469 | PGM_PAGE_GET_HCPHYS(pPage);
1470#endif
1471 }
1472 else
1473 {
1474 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1475 PteDst.u = 0;
1476 }
1477 /** @todo count these two kinds. */
1478 }
1479 else
1480 {
1481#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1482 /*
1483 * If the page or page directory entry is not marked accessed,
1484 * we mark the page not present.
1485 */
1486 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1487 {
1488 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1489 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1490 PteDst.u = 0;
1491 }
1492 else
1493 /*
1494 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1495 * when the page is modified.
1496 */
1497 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1498 {
1499 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1500 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1501 | PGM_PAGE_GET_HCPHYS(pPage)
1502 | PGM_PTFLAGS_TRACK_DIRTY;
1503 }
1504 else
1505#endif
1506 {
1507 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1508#if PGM_SHW_TYPE == PGM_TYPE_EPT
1509 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1510 PteDst.n.u1Present = 1;
1511 PteDst.n.u1Write = 1;
1512 PteDst.n.u1Execute = 1;
1513 PteDst.n.u1IgnorePAT = 1;
1514 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1515 /* PteDst.n.u1Size = 0 */
1516#else
1517 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1518 | PGM_PAGE_GET_HCPHYS(pPage);
1519#endif
1520 }
1521 }
1522
1523 /*
1524 * Make sure only allocated pages are mapped writable.
1525 */
1526 if ( PteDst.n.u1Write
1527 && PteDst.n.u1Present
1528 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1529 {
1530 /* Still applies to shared pages. */
1531 Assert(!PGM_PAGE_IS_ZERO(pPage));
1532 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1533 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1534 }
1535
1536 /*
1537 * Keep user track up to date.
1538 */
1539 if (PteDst.n.u1Present)
1540 {
1541 if (!pPteDst->n.u1Present)
1542 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1543 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1544 {
1545 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1546 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1547 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1548 }
1549 }
1550 else if (pPteDst->n.u1Present)
1551 {
1552 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1553 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1554 }
1555
1556 /*
1557 * Update statistics and commit the entry.
1558 */
1559#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1560 if (!PteSrc.n.u1Global)
1561 pShwPage->fSeenNonGlobal = true;
1562#endif
1563 ASMAtomicWriteSize(pPteDst, PteDst.u);
1564 }
1565 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1566 /** @todo count these. */
1567 }
1568 else
1569 {
1570 /*
1571 * Page not-present.
1572 */
1573 Log2(("SyncPageWorker: page not present in Pte\n"));
1574 /* Keep user track up to date. */
1575 if (pPteDst->n.u1Present)
1576 {
1577 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1578 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1579 }
1580 ASMAtomicWriteSize(pPteDst, 0);
1581 /** @todo count these. */
1582 }
1583}
1584
1585
1586/**
1587 * Syncs a guest OS page.
1588 *
1589 * There are no conflicts at this point, neither is there any need for
1590 * page table allocations.
1591 *
1592 * @returns VBox status code.
1593 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1594 * @param pVCpu The VMCPU handle.
1595 * @param PdeSrc Page directory entry of the guest.
1596 * @param GCPtrPage Guest context page address.
1597 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1598 * @param uErr Fault error (X86_TRAP_PF_*).
1599 */
1600PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1601{
1602 PVM pVM = pVCpu->CTX_SUFF(pVM);
1603 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1604 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1605
1606 Assert(PGMIsLockOwner(pVM));
1607
1608#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1609 || PGM_GST_TYPE == PGM_TYPE_PAE \
1610 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1611 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1612 && PGM_SHW_TYPE != PGM_TYPE_EPT
1613
1614 /*
1615 * Assert preconditions.
1616 */
1617 Assert(PdeSrc.n.u1Present);
1618 Assert(cPages);
1619# if 0 /* rarely useful; leave for debugging. */
1620 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1621# endif
1622
1623 /*
1624 * Get the shadow PDE, find the shadow page table in the pool.
1625 */
1626# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1627 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1628 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1629
1630 /* Fetch the pgm pool shadow descriptor. */
1631 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1632 Assert(pShwPde);
1633
1634# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1635 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1636 PPGMPOOLPAGE pShwPde = NULL;
1637 PX86PDPAE pPDDst;
1638
1639 /* Fetch the pgm pool shadow descriptor. */
1640 int rc2 = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1641 AssertRCSuccessReturn(rc2, rc2);
1642 Assert(pShwPde);
1643
1644 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1645 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1646
1647# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1648 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1649 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1650 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1651 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1652
1653 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1654 AssertRCSuccessReturn(rc2, rc2);
1655 Assert(pPDDst && pPdptDst);
1656 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1657# endif
1658 SHWPDE PdeDst = *pPdeDst;
1659
1660 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1661 if (!PdeDst.n.u1Present)
1662 {
1663 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE p=%p/%RX64\n", pPdeDst, (uint64_t)PdeDst.u));
1664 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1665 return VINF_SUCCESS; /* force the instruction to be executed again. */
1666 }
1667
1668 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1669 Assert(pShwPage);
1670
1671# if PGM_GST_TYPE == PGM_TYPE_AMD64
1672 /* Fetch the pgm pool shadow descriptor. */
1673 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1674 Assert(pShwPde);
1675# endif
1676
1677# if defined(IN_RC)
1678 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1679 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1680# endif
1681
1682 /*
1683 * Check that the page is present and that the shadow PDE isn't out of sync.
1684 */
1685# if PGM_GST_TYPE == PGM_TYPE_32BIT
1686 const bool fBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1687# else
1688 const bool fBigPage = PdeSrc.b.u1Size;
1689# endif
1690 RTGCPHYS GCPhys;
1691 if (!fBigPage)
1692 {
1693 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1694# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1695 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1696 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1697# endif
1698 }
1699 else
1700 {
1701 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1702# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1703 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1704 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1705# endif
1706 }
1707 if ( pShwPage->GCPhys == GCPhys
1708 && PdeSrc.n.u1Present
1709 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1710 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1711# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1712 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !CPUMIsGuestNXEnabled(pVCpu))
1713# endif
1714 )
1715 {
1716 /*
1717 * Check that the PDE is marked accessed already.
1718 * Since we set the accessed bit *before* getting here on a #PF, this
1719 * check is only meant for dealing with non-#PF'ing paths.
1720 */
1721 if (PdeSrc.n.u1Accessed)
1722 {
1723 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1724 if (!fBigPage)
1725 {
1726 /*
1727 * 4KB Page - Map the guest page table.
1728 */
1729 PGSTPT pPTSrc;
1730 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1731 if (RT_SUCCESS(rc))
1732 {
1733# ifdef PGM_SYNC_N_PAGES
1734 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1735 if ( cPages > 1
1736 && !(uErr & X86_TRAP_PF_P)
1737 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1738 {
1739 /*
1740 * This code path is currently only taken when the caller is PGMTrap0eHandler
1741 * for non-present pages!
1742 *
1743 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1744 * deal with locality.
1745 */
1746 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1747# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1748 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1749 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1750# else
1751 const unsigned offPTSrc = 0;
1752# endif
1753 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1754 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1755 iPTDst = 0;
1756 else
1757 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1758 for (; iPTDst < iPTDstEnd; iPTDst++)
1759 {
1760 if (!pPTDst->a[iPTDst].n.u1Present)
1761 {
1762 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1763 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1764 NOREF(GCPtrCurPage);
1765#ifndef IN_RING0
1766 /*
1767 * Assuming kernel code will be marked as supervisor - and not as user level
1768 * and executed using a conforming code selector - And marked as readonly.
1769 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1770 */
1771 PPGMPAGE pPage;
1772 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1773 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1774 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
1775 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1776 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1777 )
1778#endif /* else: CSAM not active */
1779 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1780 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1781 GCPtrCurPage, PteSrc.n.u1Present,
1782 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1783 PteSrc.n.u1User & PdeSrc.n.u1User,
1784 (uint64_t)PteSrc.u,
1785 (uint64_t)pPTDst->a[iPTDst].u,
1786 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1787 }
1788 }
1789 }
1790 else
1791# endif /* PGM_SYNC_N_PAGES */
1792 {
1793 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1794 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1795 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1796 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1797 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1798 GCPtrPage, PteSrc.n.u1Present,
1799 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1800 PteSrc.n.u1User & PdeSrc.n.u1User,
1801 (uint64_t)PteSrc.u,
1802 (uint64_t)pPTDst->a[iPTDst].u,
1803 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1804 }
1805 }
1806 else /* MMIO or invalid page: emulated in #PF handler. */
1807 {
1808 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1809 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1810 }
1811 }
1812 else
1813 {
1814 /*
1815 * 4/2MB page - lazy syncing shadow 4K pages.
1816 * (There are many causes of getting here, it's no longer only CSAM.)
1817 */
1818 /* Calculate the GC physical address of this 4KB shadow page. */
1819 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1820 /* Find ram range. */
1821 PPGMPAGE pPage;
1822 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1823 if (RT_SUCCESS(rc))
1824 {
1825 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
1826
1827# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1828 /* Try to make the page writable if necessary. */
1829 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1830 && ( PGM_PAGE_IS_ZERO(pPage)
1831 || ( PdeSrc.n.u1Write
1832 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1833# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1834 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1835# endif
1836 )
1837 )
1838 )
1839 {
1840 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1841 AssertRC(rc);
1842 }
1843# endif
1844
1845 /*
1846 * Make shadow PTE entry.
1847 */
1848 SHWPTE PteDst;
1849 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1850 | PGM_PAGE_GET_HCPHYS(pPage);
1851 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1852 {
1853 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1854 PteDst.n.u1Write = 0;
1855 else
1856 PteDst.u = 0;
1857 }
1858
1859 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1860 if ( PteDst.n.u1Present
1861 && !pPTDst->a[iPTDst].n.u1Present)
1862 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1863
1864 /* Make sure only allocated pages are mapped writable. */
1865 if ( PteDst.n.u1Write
1866 && PteDst.n.u1Present
1867 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1868 {
1869 /* Still applies to shared pages. */
1870 Assert(!PGM_PAGE_IS_ZERO(pPage));
1871 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1872 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1873 }
1874
1875 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1876
1877 /*
1878 * If the page is not flagged as dirty and is writable, then make it read-only
1879 * at PD level, so we can set the dirty bit when the page is modified.
1880 *
1881 * ASSUMES that page access handlers are implemented on page table entry level.
1882 * Thus we will first catch the dirty access and set PDE.D and restart. If
1883 * there is an access handler, we'll trap again and let it work on the problem.
1884 */
1885 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1886 * As for invlpg, it simply frees the whole shadow PT.
1887 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1888 if ( !PdeSrc.b.u1Dirty
1889 && PdeSrc.b.u1Write)
1890 {
1891 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1892 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1893 PdeDst.n.u1Write = 0;
1894 }
1895 else
1896 {
1897 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1898 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1899 }
1900 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1901 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1902 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1903 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1904 }
1905 else
1906 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1907 }
1908# if defined(IN_RC)
1909 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1910 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1911# endif
1912 return VINF_SUCCESS;
1913 }
1914 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1915 }
1916 else
1917 {
1918 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1919 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1920 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1921 }
1922
1923 /*
1924 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1925 * Yea, I'm lazy.
1926 */
1927 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1928 ASMAtomicWriteSize(pPdeDst, 0);
1929
1930# if defined(IN_RC)
1931 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1932 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1933# endif
1934 PGM_INVL_VCPU_TLBS(pVCpu);
1935 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1936
1937#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1938 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1939 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1940 && !defined(IN_RC)
1941
1942# ifdef PGM_SYNC_N_PAGES
1943 /*
1944 * Get the shadow PDE, find the shadow page table in the pool.
1945 */
1946# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1947 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
1948
1949# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1950 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
1951
1952# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1953 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1954 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1955 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1956 X86PDEPAE PdeDst;
1957 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1958
1959 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1960 AssertRCSuccessReturn(rc, rc);
1961 Assert(pPDDst && pPdptDst);
1962 PdeDst = pPDDst->a[iPDDst];
1963# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1964 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1965 PEPTPD pPDDst;
1966 EPTPDE PdeDst;
1967
1968 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
1969 if (rc != VINF_SUCCESS)
1970 {
1971 AssertRC(rc);
1972 return rc;
1973 }
1974 Assert(pPDDst);
1975 PdeDst = pPDDst->a[iPDDst];
1976# endif
1977 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1978 if (!PdeDst.n.u1Present)
1979 {
1980 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
1981 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1982 return VINF_SUCCESS; /* force the instruction to be executed again. */
1983 }
1984
1985 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
1986 if (PdeDst.n.u1Size)
1987 {
1988 Assert(HWACCMIsNestedPagingActive(pVM));
1989 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
1990 return VINF_SUCCESS;
1991 }
1992
1993 /* Mask away the page offset. */
1994 GCPtrPage &= ~((RTGCPTR)0xfff);
1995
1996 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1997 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1998
1999 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2000 if ( cPages > 1
2001 && !(uErr & X86_TRAP_PF_P)
2002 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2003 {
2004 /*
2005 * This code path is currently only taken when the caller is PGMTrap0eHandler
2006 * for non-present pages!
2007 *
2008 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2009 * deal with locality.
2010 */
2011 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2012 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2013 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2014 iPTDst = 0;
2015 else
2016 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2017 for (; iPTDst < iPTDstEnd; iPTDst++)
2018 {
2019 if (!pPTDst->a[iPTDst].n.u1Present)
2020 {
2021 GSTPTE PteSrc;
2022
2023 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2024
2025 /* Fake the page table entry */
2026 PteSrc.u = GCPtrCurPage;
2027 PteSrc.n.u1Present = 1;
2028 PteSrc.n.u1Dirty = 1;
2029 PteSrc.n.u1Accessed = 1;
2030 PteSrc.n.u1Write = 1;
2031 PteSrc.n.u1User = 1;
2032
2033 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2034
2035 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2036 GCPtrCurPage, PteSrc.n.u1Present,
2037 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2038 PteSrc.n.u1User & PdeSrc.n.u1User,
2039 (uint64_t)PteSrc.u,
2040 (uint64_t)pPTDst->a[iPTDst].u,
2041 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2042
2043 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2044 break;
2045 }
2046 else
2047 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2048 }
2049 }
2050 else
2051# endif /* PGM_SYNC_N_PAGES */
2052 {
2053 GSTPTE PteSrc;
2054 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2055 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2056
2057 /* Fake the page table entry */
2058 PteSrc.u = GCPtrCurPage;
2059 PteSrc.n.u1Present = 1;
2060 PteSrc.n.u1Dirty = 1;
2061 PteSrc.n.u1Accessed = 1;
2062 PteSrc.n.u1Write = 1;
2063 PteSrc.n.u1User = 1;
2064 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2065
2066 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2067 GCPtrPage, PteSrc.n.u1Present,
2068 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2069 PteSrc.n.u1User & PdeSrc.n.u1User,
2070 (uint64_t)PteSrc.u,
2071 (uint64_t)pPTDst->a[iPTDst].u,
2072 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2073 }
2074 return VINF_SUCCESS;
2075
2076#else
2077 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2078 return VERR_INTERNAL_ERROR;
2079#endif
2080}
2081
2082
2083#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2084/**
2085 * Investigate page fault and handle write protection page faults caused by
2086 * dirty bit tracking.
2087 *
2088 * @returns VBox status code.
2089 * @param pVCpu The VMCPU handle.
2090 * @param uErr Page fault error code.
2091 * @param pPdeSrc Guest page directory entry.
2092 * @param GCPtrPage Guest context page address.
2093 */
2094PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2095{
2096 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2097 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2098 bool fMaybeWriteProtFault = fWriteFault && (fUserLevelFault || CPUMIsGuestR0WriteProtEnabled(pVCpu));
2099# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2100 bool fMaybeNXEFault = (uErr & X86_TRAP_PF_ID) && CPUMIsGuestNXEnabled(pVCpu);
2101# endif
2102 unsigned uPageFaultLevel;
2103 int rc;
2104 PVM pVM = pVCpu->CTX_SUFF(pVM);
2105
2106 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2107
2108# if PGM_GST_TYPE == PGM_TYPE_PAE \
2109 || PGM_GST_TYPE == PGM_TYPE_AMD64
2110
2111# if PGM_GST_TYPE == PGM_TYPE_AMD64
2112 PX86PML4E pPml4eSrc;
2113 PX86PDPE pPdpeSrc;
2114
2115 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc);
2116 Assert(pPml4eSrc);
2117
2118 /*
2119 * Real page fault? (PML4E level)
2120 */
2121 if ( (uErr & X86_TRAP_PF_RSVD)
2122 || !pPml4eSrc->n.u1Present
2123 || (fMaybeWriteProtFault && !pPml4eSrc->n.u1Write)
2124 || (fMaybeNXEFault && pPml4eSrc->n.u1NoExecute)
2125 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2126 )
2127 {
2128 uPageFaultLevel = 0;
2129 goto l_UpperLevelPageFault;
2130 }
2131 Assert(pPdpeSrc);
2132
2133# else /* PAE */
2134 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVCpu->pgm.s, GCPtrPage);
2135# endif /* PAE */
2136
2137 /*
2138 * Real page fault? (PDPE level)
2139 */
2140 if ( (uErr & X86_TRAP_PF_RSVD)
2141 || !pPdpeSrc->n.u1Present
2142# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2143 || (fMaybeWriteProtFault && !pPdpeSrc->lm.u1Write)
2144 || (fMaybeNXEFault && pPdpeSrc->lm.u1NoExecute)
2145 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2146# endif
2147 )
2148 {
2149 uPageFaultLevel = 1;
2150 goto l_UpperLevelPageFault;
2151 }
2152# endif
2153
2154 /*
2155 * Real page fault? (PDE level)
2156 */
2157 if ( (uErr & X86_TRAP_PF_RSVD)
2158 || !pPdeSrc->n.u1Present
2159 || (fMaybeWriteProtFault && !pPdeSrc->n.u1Write)
2160# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2161 || (fMaybeNXEFault && pPdeSrc->n.u1NoExecute)
2162# endif
2163 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2164 {
2165 uPageFaultLevel = 2;
2166 goto l_UpperLevelPageFault;
2167 }
2168
2169 /*
2170 * First check the easy case where the page directory has been marked read-only to track
2171 * the dirty bit of an emulated BIG page
2172 */
2173 if ( pPdeSrc->b.u1Size
2174# if PGM_GST_TYPE == PGM_TYPE_32BIT
2175 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2176# endif
2177 )
2178 {
2179 /* Mark guest page directory as accessed */
2180# if PGM_GST_TYPE == PGM_TYPE_AMD64
2181 pPml4eSrc->n.u1Accessed = 1;
2182 pPdpeSrc->lm.u1Accessed = 1;
2183# endif
2184 pPdeSrc->b.u1Accessed = 1;
2185
2186 /*
2187 * Only write protection page faults are relevant here.
2188 */
2189 if (fWriteFault)
2190 {
2191 /* Mark guest page directory as dirty (BIG page only). */
2192 pPdeSrc->b.u1Dirty = 1;
2193 }
2194 return VINF_SUCCESS;
2195 }
2196 /* else: 4KB page table */
2197
2198 /*
2199 * Map the guest page table.
2200 */
2201 PGSTPT pPTSrc;
2202 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2203 if (RT_SUCCESS(rc))
2204 {
2205 /*
2206 * Real page fault?
2207 */
2208 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2209 const GSTPTE PteSrc = *pPteSrc;
2210 if ( !PteSrc.n.u1Present
2211 || (fMaybeWriteProtFault && !PteSrc.n.u1Write)
2212# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2213 || (fMaybeNXEFault && PteSrc.n.u1NoExecute)
2214# endif
2215 || (fUserLevelFault && !PteSrc.n.u1User)
2216 )
2217 {
2218 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2219 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2220
2221 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2222 * See the 2nd case above as well.
2223 */
2224 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2225 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2226
2227 return VINF_EM_RAW_GUEST_TRAP;
2228 }
2229 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2230
2231 /*
2232 * Set the accessed bits in the page directory and the page table.
2233 */
2234# if PGM_GST_TYPE == PGM_TYPE_AMD64
2235 pPml4eSrc->n.u1Accessed = 1;
2236 pPdpeSrc->lm.u1Accessed = 1;
2237# endif
2238 pPdeSrc->n.u1Accessed = 1;
2239 pPteSrc->n.u1Accessed = 1;
2240
2241 /*
2242 * Only write protection page faults are relevant here.
2243 */
2244 if (fWriteFault)
2245 {
2246 /* Write access, so mark guest entry as dirty. */
2247# ifdef VBOX_WITH_STATISTICS
2248 if (!pPteSrc->n.u1Dirty)
2249 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2250 else
2251 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2252# endif
2253
2254 pPteSrc->n.u1Dirty = 1;
2255 }
2256 return VINF_SUCCESS;
2257 }
2258 AssertRC(rc);
2259 return rc;
2260
2261
2262l_UpperLevelPageFault:
2263 /*
2264 * Pagefault detected while checking the PML4E, PDPE or PDE.
2265 * Single exit handler to get rid of duplicate code paths.
2266 */
2267 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2268 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2269
2270 if ( 1
2271# if PGM_GST_TYPE == PGM_TYPE_AMD64
2272 && pPml4eSrc->n.u1Present
2273# endif
2274# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2275 && pPdpeSrc->n.u1Present
2276# endif
2277 && pPdeSrc->n.u1Present)
2278 {
2279 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2280 if ( pPdeSrc->b.u1Size
2281# if PGM_GST_TYPE == PGM_TYPE_32BIT
2282 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2283# endif
2284 )
2285 {
2286 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2287 }
2288 else
2289 {
2290 /*
2291 * Map the guest page table.
2292 */
2293 PGSTPT pPTSrc2;
2294 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc2);
2295 if (RT_SUCCESS(rc))
2296 {
2297 PGSTPTE pPteSrc = &pPTSrc2->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2298 if (pPteSrc->n.u1Present)
2299 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2300 }
2301 AssertRC(rc);
2302 }
2303 }
2304 return VINF_EM_RAW_GUEST_TRAP;
2305}
2306
2307/**
2308 * Handle dirty bit tracking faults.
2309 *
2310 * @returns VBox status code.
2311 * @param pVCpu The VMCPU handle.
2312 * @param uErr Page fault error code.
2313 * @param pPdeSrc Guest page directory entry.
2314 * @param pPdeDst Shadow page directory entry.
2315 * @param GCPtrPage Guest context page address.
2316 */
2317PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2318{
2319# if PGM_GST_TYPE == PGM_TYPE_32BIT
2320 const bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
2321# else
2322 const bool fBigPagesSupported = true;
2323# endif
2324 PVM pVM = pVCpu->CTX_SUFF(pVM);
2325 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2326
2327 Assert(PGMIsLockOwner(pVM));
2328
2329 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2330 {
2331 if ( pPdeDst->n.u1Present
2332 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2333 {
2334 SHWPDE PdeDst = *pPdeDst;
2335
2336 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2337 Assert(pPdeSrc->b.u1Write);
2338
2339 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2340 * fault again and take this path to only invalidate the entry.
2341 */
2342 PdeDst.n.u1Write = 1;
2343 PdeDst.n.u1Accessed = 1;
2344 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2345 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2346 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2347 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2348 }
2349# ifdef IN_RING0
2350 else
2351 /* Check for stale TLB entry; only applies to the SMP guest case. */
2352 if ( pVM->cCpus > 1
2353 && pPdeDst->n.u1Write
2354 && pPdeDst->n.u1Accessed)
2355 {
2356 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2357 if (pShwPage)
2358 {
2359 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2360 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2361 if ( pPteDst->n.u1Present
2362 && pPteDst->n.u1Write)
2363 {
2364 /* Stale TLB entry. */
2365 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2366 PGM_INVL_PG(pVCpu, GCPtrPage);
2367 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2368 }
2369 }
2370 }
2371# endif /* IN_RING0 */
2372 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2373 }
2374
2375 /*
2376 * Map the guest page table.
2377 */
2378 PGSTPT pPTSrc;
2379 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2380 if (RT_SUCCESS(rc))
2381 {
2382 if (pPdeDst->n.u1Present)
2383 {
2384 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2385 const GSTPTE PteSrc = *pPteSrc;
2386#ifndef IN_RING0
2387 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2388 * Our individual shadow handlers will provide more information and force a fatal exit.
2389 */
2390 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2391 {
2392 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2393 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2394 }
2395#endif
2396 /*
2397 * Map shadow page table.
2398 */
2399 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2400 if (pShwPage)
2401 {
2402 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2403 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2404 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2405 {
2406 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2407 {
2408 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2409 SHWPTE PteDst = *pPteDst;
2410
2411 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2412 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2413
2414 Assert(pPteSrc->n.u1Write);
2415
2416 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2417 * fault again and take this path to only invalidate the entry.
2418 */
2419 if (RT_LIKELY(pPage))
2420 {
2421 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2422 {
2423 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2424 PteDst.n.u1Write = 0;
2425 }
2426 else
2427 {
2428 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2429 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2430 {
2431 rc = pgmPhysPageMakeWritable(pVM, pPage, pPteSrc->u & GST_PTE_PG_MASK);
2432 AssertRC(rc);
2433 }
2434 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2435 {
2436 PteDst.n.u1Write = 1;
2437 }
2438 else
2439 {
2440 /* Still applies to shared pages. */
2441 Assert(!PGM_PAGE_IS_ZERO(pPage));
2442 PteDst.n.u1Write = 0;
2443 }
2444 }
2445 }
2446 else
2447 PteDst.n.u1Write = 1;
2448
2449 PteDst.n.u1Dirty = 1;
2450 PteDst.n.u1Accessed = 1;
2451 PteDst.au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2452 ASMAtomicWriteSize(pPteDst, PteDst.u);
2453 PGM_INVL_PG(pVCpu, GCPtrPage);
2454 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2455 }
2456# ifdef IN_RING0
2457 else
2458 /* Check for stale TLB entry; only applies to the SMP guest case. */
2459 if ( pVM->cCpus > 1
2460 && pPteDst->n.u1Write == 1
2461 && pPteDst->n.u1Accessed == 1)
2462 {
2463 /* Stale TLB entry. */
2464 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2465 PGM_INVL_PG(pVCpu, GCPtrPage);
2466 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2467 }
2468# endif
2469 }
2470 }
2471 else
2472 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2473 }
2474 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2475 }
2476 AssertRC(rc);
2477 return rc;
2478}
2479#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2480
2481
2482/**
2483 * Sync a shadow page table.
2484 *
2485 * The shadow page table is not present. This includes the case where
2486 * there is a conflict with a mapping.
2487 *
2488 * @returns VBox status code.
2489 * @param pVCpu The VMCPU handle.
2490 * @param iPD Page directory index.
2491 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2492 * Assume this is a temporary mapping.
2493 * @param GCPtrPage GC Pointer of the page that caused the fault
2494 */
2495PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2496{
2497 PVM pVM = pVCpu->CTX_SUFF(pVM);
2498 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2499
2500 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2501#if 0 /* rarely useful; leave for debugging. */
2502 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2503#endif
2504 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2505
2506 Assert(PGMIsLocked(pVM));
2507
2508#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2509 || PGM_GST_TYPE == PGM_TYPE_PAE \
2510 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2511 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2512 && PGM_SHW_TYPE != PGM_TYPE_EPT
2513
2514 int rc = VINF_SUCCESS;
2515
2516 /*
2517 * Validate input a little bit.
2518 */
2519 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2520# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2521 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2522 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2523
2524 /* Fetch the pgm pool shadow descriptor. */
2525 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2526 Assert(pShwPde);
2527
2528# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2529 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2530 PPGMPOOLPAGE pShwPde = NULL;
2531 PX86PDPAE pPDDst;
2532 PSHWPDE pPdeDst;
2533
2534 /* Fetch the pgm pool shadow descriptor. */
2535 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2536 AssertRCSuccessReturn(rc, rc);
2537 Assert(pShwPde);
2538
2539 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2540 pPdeDst = &pPDDst->a[iPDDst];
2541
2542# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2543 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2544 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2545 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2546 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2547 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2548 AssertRCSuccessReturn(rc, rc);
2549 Assert(pPDDst);
2550 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2551# endif
2552 SHWPDE PdeDst = *pPdeDst;
2553
2554# if PGM_GST_TYPE == PGM_TYPE_AMD64
2555 /* Fetch the pgm pool shadow descriptor. */
2556 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2557 Assert(pShwPde);
2558# endif
2559
2560# ifndef PGM_WITHOUT_MAPPINGS
2561 /*
2562 * Check for conflicts.
2563 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2564 * HC: Simply resolve the conflict.
2565 */
2566 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2567 {
2568 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2569# ifndef IN_RING3
2570 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2571 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2572 return VERR_ADDRESS_CONFLICT;
2573# else
2574 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2575 Assert(pMapping);
2576# if PGM_GST_TYPE == PGM_TYPE_32BIT
2577 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2578# elif PGM_GST_TYPE == PGM_TYPE_PAE
2579 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2580# else
2581 AssertFailed(); /* can't happen for amd64 */
2582# endif
2583 if (RT_FAILURE(rc))
2584 {
2585 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2586 return rc;
2587 }
2588 PdeDst = *pPdeDst;
2589# endif
2590 }
2591# endif /* !PGM_WITHOUT_MAPPINGS */
2592 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2593
2594# if defined(IN_RC)
2595 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2596 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2597# endif
2598
2599 /*
2600 * Sync page directory entry.
2601 */
2602 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2603 if (PdeSrc.n.u1Present)
2604 {
2605 /*
2606 * Allocate & map the page table.
2607 */
2608 PSHWPT pPTDst;
2609# if PGM_GST_TYPE == PGM_TYPE_32BIT
2610 const bool fPageTable = !PdeSrc.b.u1Size || !CPUMIsGuestPageSizeExtEnabled(pVCpu);
2611# else
2612 const bool fPageTable = !PdeSrc.b.u1Size;
2613# endif
2614 PPGMPOOLPAGE pShwPage;
2615 RTGCPHYS GCPhys;
2616 if (fPageTable)
2617 {
2618 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2619# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2620 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2621 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2622# endif
2623 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2624 }
2625 else
2626 {
2627 PGMPOOLACCESS enmAccess;
2628# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2629 const bool fNoExecute = PdeSrc.n.u1NoExecute && CPUMIsGuestNXEnabled(pVCpu);
2630# else
2631 const bool fNoExecute = false;
2632# endif
2633
2634 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2635# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2636 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2637 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2638# endif
2639 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2640 if (PdeSrc.n.u1User)
2641 {
2642 if (PdeSrc.n.u1Write)
2643 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2644 else
2645 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2646 }
2647 else
2648 {
2649 if (PdeSrc.n.u1Write)
2650 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2651 else
2652 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2653 }
2654 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2655 }
2656 if (rc == VINF_SUCCESS)
2657 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2658 else if (rc == VINF_PGM_CACHED_PAGE)
2659 {
2660 /*
2661 * The PT was cached, just hook it up.
2662 */
2663 if (fPageTable)
2664 PdeDst.u = pShwPage->Core.Key
2665 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2666 else
2667 {
2668 PdeDst.u = pShwPage->Core.Key
2669 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2670 /* (see explanation and assumptions further down.) */
2671 if ( !PdeSrc.b.u1Dirty
2672 && PdeSrc.b.u1Write)
2673 {
2674 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2675 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2676 PdeDst.b.u1Write = 0;
2677 }
2678 }
2679 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2680# if defined(IN_RC)
2681 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2682# endif
2683 return VINF_SUCCESS;
2684 }
2685 else if (rc == VERR_PGM_POOL_FLUSHED)
2686 {
2687 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2688# if defined(IN_RC)
2689 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2690# endif
2691 return VINF_PGM_SYNC_CR3;
2692 }
2693 else
2694 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2695 PdeDst.u &= X86_PDE_AVL_MASK;
2696 PdeDst.u |= pShwPage->Core.Key;
2697
2698 /*
2699 * Page directory has been accessed (this is a fault situation, remember).
2700 */
2701 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2702 if (fPageTable)
2703 {
2704 /*
2705 * Page table - 4KB.
2706 *
2707 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2708 */
2709 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2710 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2711 PGSTPT pPTSrc;
2712 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2713 if (RT_SUCCESS(rc))
2714 {
2715 /*
2716 * Start by syncing the page directory entry so CSAM's TLB trick works.
2717 */
2718 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2719 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2720 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2721# if defined(IN_RC)
2722 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2723# endif
2724
2725 /*
2726 * Directory/page user or supervisor privilege: (same goes for read/write)
2727 *
2728 * Directory Page Combined
2729 * U/S U/S U/S
2730 * 0 0 0
2731 * 0 1 0
2732 * 1 0 0
2733 * 1 1 1
2734 *
2735 * Simple AND operation. Table listed for completeness.
2736 *
2737 */
2738 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2739# ifdef PGM_SYNC_N_PAGES
2740 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2741 unsigned iPTDst = iPTBase;
2742 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2743 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2744 iPTDst = 0;
2745 else
2746 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2747# else /* !PGM_SYNC_N_PAGES */
2748 unsigned iPTDst = 0;
2749 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2750# endif /* !PGM_SYNC_N_PAGES */
2751# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2752 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2753 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2754# else
2755 const unsigned offPTSrc = 0;
2756# endif
2757 for (; iPTDst < iPTDstEnd; iPTDst++)
2758 {
2759 const unsigned iPTSrc = iPTDst + offPTSrc;
2760 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2761
2762 if (PteSrc.n.u1Present) /* we've already cleared it above */
2763 {
2764# ifndef IN_RING0
2765 /*
2766 * Assuming kernel code will be marked as supervisor - and not as user level
2767 * and executed using a conforming code selector - And marked as readonly.
2768 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2769 */
2770 PPGMPAGE pPage;
2771 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2772 || !CSAMDoesPageNeedScanning(pVM, (iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT))
2773 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2774 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2775 )
2776# endif
2777 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2778 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2779 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2780 PteSrc.n.u1Present,
2781 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2782 PteSrc.n.u1User & PdeSrc.n.u1User,
2783 (uint64_t)PteSrc.u,
2784 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2785 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2786 }
2787 } /* for PTEs */
2788 }
2789 }
2790 else
2791 {
2792 /*
2793 * Big page - 2/4MB.
2794 *
2795 * We'll walk the ram range list in parallel and optimize lookups.
2796 * We will only sync on shadow page table at a time.
2797 */
2798 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2799
2800 /**
2801 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2802 */
2803
2804 /*
2805 * Start by syncing the page directory entry.
2806 */
2807 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2808 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2809
2810 /*
2811 * If the page is not flagged as dirty and is writable, then make it read-only
2812 * at PD level, so we can set the dirty bit when the page is modified.
2813 *
2814 * ASSUMES that page access handlers are implemented on page table entry level.
2815 * Thus we will first catch the dirty access and set PDE.D and restart. If
2816 * there is an access handler, we'll trap again and let it work on the problem.
2817 */
2818 /** @todo move the above stuff to a section in the PGM documentation. */
2819 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2820 if ( !PdeSrc.b.u1Dirty
2821 && PdeSrc.b.u1Write)
2822 {
2823 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2824 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2825 PdeDst.b.u1Write = 0;
2826 }
2827 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2828# if defined(IN_RC)
2829 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2830# endif
2831
2832 /*
2833 * Fill the shadow page table.
2834 */
2835 /* Get address and flags from the source PDE. */
2836 SHWPTE PteDstBase;
2837 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2838
2839 /* Loop thru the entries in the shadow PT. */
2840 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2841 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2842 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2843 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2844 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2845 unsigned iPTDst = 0;
2846 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2847 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2848 {
2849 /* Advance ram range list. */
2850 while (pRam && GCPhys > pRam->GCPhysLast)
2851 pRam = pRam->CTX_SUFF(pNext);
2852 if (pRam && GCPhys >= pRam->GCPhys)
2853 {
2854 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2855 do
2856 {
2857 /* Make shadow PTE. */
2858 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2859 SHWPTE PteDst;
2860
2861# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2862 /* Try to make the page writable if necessary. */
2863 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2864 && ( PGM_PAGE_IS_ZERO(pPage)
2865 || ( PteDstBase.n.u1Write
2866 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2867# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2868 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2869# endif
2870 && !PGM_PAGE_IS_BALLOONED(pPage))
2871 )
2872 )
2873 {
2874 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2875 AssertRCReturn(rc, rc);
2876 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2877 break;
2878 }
2879# endif
2880
2881 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2882 {
2883 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2884 {
2885 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2886 PteDst.n.u1Write = 0;
2887 }
2888 else
2889 PteDst.u = 0;
2890 }
2891 else
2892 if (PGM_PAGE_IS_BALLOONED(pPage))
2893 {
2894 /* Skip ballooned pages. */
2895 PteDst.u = 0;
2896 }
2897# ifndef IN_RING0
2898 /*
2899 * Assuming kernel code will be marked as supervisor and not as user level and executed
2900 * using a conforming code selector. Don't check for readonly, as that implies the whole
2901 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2902 */
2903 else if ( !PdeSrc.n.u1User
2904 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
2905 PteDst.u = 0;
2906# endif
2907 else
2908 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2909
2910 /* Only map writable pages writable. */
2911 if ( PteDst.n.u1Write
2912 && PteDst.n.u1Present
2913 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2914 {
2915 /* Still applies to shared pages. */
2916 Assert(!PGM_PAGE_IS_ZERO(pPage));
2917 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2918 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2919 }
2920
2921 if (PteDst.n.u1Present)
2922 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2923
2924 /* commit it */
2925 pPTDst->a[iPTDst] = PteDst;
2926 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2927 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2928 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2929
2930 /* advance */
2931 GCPhys += PAGE_SIZE;
2932 iHCPage++;
2933 iPTDst++;
2934 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2935 && GCPhys <= pRam->GCPhysLast);
2936 }
2937 else if (pRam)
2938 {
2939 Log(("Invalid pages at %RGp\n", GCPhys));
2940 do
2941 {
2942 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2943 GCPhys += PAGE_SIZE;
2944 iPTDst++;
2945 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2946 && GCPhys < pRam->GCPhys);
2947 }
2948 else
2949 {
2950 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2951 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2952 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2953 }
2954 } /* while more PTEs */
2955 } /* 4KB / 4MB */
2956 }
2957 else
2958 AssertRelease(!PdeDst.n.u1Present);
2959
2960 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2961 if (RT_FAILURE(rc))
2962 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2963 return rc;
2964
2965#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2966 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2967 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2968 && !defined(IN_RC)
2969
2970 /*
2971 * Validate input a little bit.
2972 */
2973 int rc = VINF_SUCCESS;
2974# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2975 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2976 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2977
2978 /* Fetch the pgm pool shadow descriptor. */
2979 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2980 Assert(pShwPde);
2981
2982# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2983 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2984 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
2985 PX86PDPAE pPDDst;
2986 PSHWPDE pPdeDst;
2987
2988 /* Fetch the pgm pool shadow descriptor. */
2989 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2990 AssertRCSuccessReturn(rc, rc);
2991 Assert(pShwPde);
2992
2993 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2994 pPdeDst = &pPDDst->a[iPDDst];
2995
2996# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2997 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2998 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2999 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3000 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3001 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3002 AssertRCSuccessReturn(rc, rc);
3003 Assert(pPDDst);
3004 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3005
3006 /* Fetch the pgm pool shadow descriptor. */
3007 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3008 Assert(pShwPde);
3009
3010# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3011 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3012 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3013 PEPTPD pPDDst;
3014 PEPTPDPT pPdptDst;
3015
3016 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3017 if (rc != VINF_SUCCESS)
3018 {
3019 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3020 AssertRC(rc);
3021 return rc;
3022 }
3023 Assert(pPDDst);
3024 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3025
3026 /* Fetch the pgm pool shadow descriptor. */
3027 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3028 Assert(pShwPde);
3029# endif
3030 SHWPDE PdeDst = *pPdeDst;
3031
3032 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
3033 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3034
3035# if defined(PGM_WITH_LARGE_PAGES) && (PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE)
3036# if (PGM_SHW_TYPE != PGM_TYPE_EPT) /* PGM_TYPE_EPT implies nested paging */
3037 if (HWACCMIsNestedPagingActive(pVM))
3038# endif
3039 {
3040 PPGMPAGE pPage;
3041
3042 /* Check if we allocated a big page before for this 2 MB range. */
3043 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
3044 if (RT_SUCCESS(rc))
3045 {
3046 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3047
3048 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3049 {
3050 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3051 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3052 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3053 }
3054 else
3055 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
3056 {
3057 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3058 rc = pgmPhysIsValidLargePage(pVM, GCPtrPage, pPage);
3059 if (RT_SUCCESS(rc))
3060 {
3061 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3062 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3063 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3064 }
3065 }
3066 else
3067 if (PGMIsUsingLargePages(pVM))
3068 {
3069 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3070 if (RT_SUCCESS(rc))
3071 {
3072 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3073 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3074 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3075 }
3076 else
3077 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3078 }
3079
3080 if (HCPhys != NIL_RTHCPHYS)
3081 {
3082 PdeDst.u &= X86_PDE_AVL_MASK;
3083 PdeDst.u |= HCPhys;
3084 PdeDst.n.u1Present = 1;
3085 PdeDst.n.u1Write = 1;
3086 PdeDst.b.u1Size = 1;
3087# if PGM_SHW_TYPE == PGM_TYPE_EPT
3088 PdeDst.n.u1Execute = 1;
3089 PdeDst.b.u1IgnorePAT = 1;
3090 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3091# else
3092 PdeDst.n.u1User = 1;
3093# endif
3094 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3095
3096 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3097 /* Add a reference to the first page only. */
3098 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3099
3100 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3101 return VINF_SUCCESS;
3102 }
3103 }
3104 }
3105# endif /* HC_ARCH_BITS == 64 */
3106
3107 GSTPDE PdeSrc;
3108 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3109 PdeSrc.n.u1Present = 1;
3110 PdeSrc.n.u1Write = 1;
3111 PdeSrc.n.u1Accessed = 1;
3112 PdeSrc.n.u1User = 1;
3113
3114 /*
3115 * Allocate & map the page table.
3116 */
3117 PSHWPT pPTDst;
3118 PPGMPOOLPAGE pShwPage;
3119 RTGCPHYS GCPhys;
3120
3121 /* Virtual address = physical address */
3122 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3123 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3124
3125 if ( rc == VINF_SUCCESS
3126 || rc == VINF_PGM_CACHED_PAGE)
3127 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3128 else
3129 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3130
3131 PdeDst.u &= X86_PDE_AVL_MASK;
3132 PdeDst.u |= pShwPage->Core.Key;
3133 PdeDst.n.u1Present = 1;
3134 PdeDst.n.u1Write = 1;
3135# if PGM_SHW_TYPE == PGM_TYPE_EPT
3136 PdeDst.n.u1Execute = 1;
3137# else
3138 PdeDst.n.u1User = 1;
3139 PdeDst.n.u1Accessed = 1;
3140# endif
3141 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3142
3143 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3144 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3145 return rc;
3146
3147#else
3148 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3149 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3150 return VERR_INTERNAL_ERROR;
3151#endif
3152}
3153
3154
3155
3156/**
3157 * Prefetch a page/set of pages.
3158 *
3159 * Typically used to sync commonly used pages before entering raw mode
3160 * after a CR3 reload.
3161 *
3162 * @returns VBox status code.
3163 * @param pVCpu The VMCPU handle.
3164 * @param GCPtrPage Page to invalidate.
3165 */
3166PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3167{
3168#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3169 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3170 /*
3171 * Check that all Guest levels thru the PDE are present, getting the
3172 * PD and PDE in the processes.
3173 */
3174 int rc = VINF_SUCCESS;
3175# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3176# if PGM_GST_TYPE == PGM_TYPE_32BIT
3177 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3178 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3179# elif PGM_GST_TYPE == PGM_TYPE_PAE
3180 unsigned iPDSrc;
3181 X86PDPE PdpeSrc;
3182 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3183 if (!pPDSrc)
3184 return VINF_SUCCESS; /* not present */
3185# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3186 unsigned iPDSrc;
3187 PX86PML4E pPml4eSrc;
3188 X86PDPE PdpeSrc;
3189 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3190 if (!pPDSrc)
3191 return VINF_SUCCESS; /* not present */
3192# endif
3193 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3194# else
3195 PGSTPD pPDSrc = NULL;
3196 const unsigned iPDSrc = 0;
3197 GSTPDE PdeSrc;
3198
3199 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3200 PdeSrc.n.u1Present = 1;
3201 PdeSrc.n.u1Write = 1;
3202 PdeSrc.n.u1Accessed = 1;
3203 PdeSrc.n.u1User = 1;
3204# endif
3205
3206 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3207 {
3208 PVM pVM = pVCpu->CTX_SUFF(pVM);
3209 pgmLock(pVM);
3210
3211# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3212 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3213# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3214 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3215 PX86PDPAE pPDDst;
3216 X86PDEPAE PdeDst;
3217# if PGM_GST_TYPE != PGM_TYPE_PAE
3218 X86PDPE PdpeSrc;
3219
3220 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3221 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3222# endif
3223 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3224 if (rc != VINF_SUCCESS)
3225 {
3226 pgmUnlock(pVM);
3227 AssertRC(rc);
3228 return rc;
3229 }
3230 Assert(pPDDst);
3231 PdeDst = pPDDst->a[iPDDst];
3232
3233# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3234 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3235 PX86PDPAE pPDDst;
3236 X86PDEPAE PdeDst;
3237
3238# if PGM_GST_TYPE == PGM_TYPE_PROT
3239 /* AMD-V nested paging */
3240 X86PML4E Pml4eSrc;
3241 X86PDPE PdpeSrc;
3242 PX86PML4E pPml4eSrc = &Pml4eSrc;
3243
3244 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3245 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3246 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3247# endif
3248
3249 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3250 if (rc != VINF_SUCCESS)
3251 {
3252 pgmUnlock(pVM);
3253 AssertRC(rc);
3254 return rc;
3255 }
3256 Assert(pPDDst);
3257 PdeDst = pPDDst->a[iPDDst];
3258# endif
3259 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3260 {
3261 if (!PdeDst.n.u1Present)
3262 {
3263 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3264 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3265 }
3266 else
3267 {
3268 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3269 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3270 * makes no sense to prefetch more than one page.
3271 */
3272 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3273 if (RT_SUCCESS(rc))
3274 rc = VINF_SUCCESS;
3275 }
3276 }
3277 pgmUnlock(pVM);
3278 }
3279 return rc;
3280
3281#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3282 return VINF_SUCCESS; /* ignore */
3283#endif
3284}
3285
3286
3287
3288
3289/**
3290 * Syncs a page during a PGMVerifyAccess() call.
3291 *
3292 * @returns VBox status code (informational included).
3293 * @param pVCpu The VMCPU handle.
3294 * @param GCPtrPage The address of the page to sync.
3295 * @param fPage The effective guest page flags.
3296 * @param uErr The trap error code.
3297 */
3298PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3299{
3300 PVM pVM = pVCpu->CTX_SUFF(pVM);
3301
3302 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3303
3304 Assert(!HWACCMIsNestedPagingActive(pVM));
3305#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3306 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3307
3308# ifndef IN_RING0
3309 if (!(fPage & X86_PTE_US))
3310 {
3311 /*
3312 * Mark this page as safe.
3313 */
3314 /** @todo not correct for pages that contain both code and data!! */
3315 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3316 CSAMMarkPage(pVM, GCPtrPage, true);
3317 }
3318# endif
3319
3320 /*
3321 * Get guest PD and index.
3322 */
3323# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3324# if PGM_GST_TYPE == PGM_TYPE_32BIT
3325 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3326 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3327# elif PGM_GST_TYPE == PGM_TYPE_PAE
3328 unsigned iPDSrc = 0;
3329 X86PDPE PdpeSrc;
3330 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3331
3332 if (pPDSrc)
3333 {
3334 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3335 return VINF_EM_RAW_GUEST_TRAP;
3336 }
3337# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3338 unsigned iPDSrc;
3339 PX86PML4E pPml4eSrc;
3340 X86PDPE PdpeSrc;
3341 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3342 if (!pPDSrc)
3343 {
3344 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3345 return VINF_EM_RAW_GUEST_TRAP;
3346 }
3347# endif
3348# else
3349 PGSTPD pPDSrc = NULL;
3350 const unsigned iPDSrc = 0;
3351# endif
3352 int rc = VINF_SUCCESS;
3353
3354 pgmLock(pVM);
3355
3356 /*
3357 * First check if the shadow pd is present.
3358 */
3359# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3360 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3361# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3362 PX86PDEPAE pPdeDst;
3363 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3364 PX86PDPAE pPDDst;
3365# if PGM_GST_TYPE != PGM_TYPE_PAE
3366 X86PDPE PdpeSrc;
3367
3368 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3369 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3370# endif
3371 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3372 if (rc != VINF_SUCCESS)
3373 {
3374 pgmUnlock(pVM);
3375 AssertRC(rc);
3376 return rc;
3377 }
3378 Assert(pPDDst);
3379 pPdeDst = &pPDDst->a[iPDDst];
3380
3381# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3382 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3383 PX86PDPAE pPDDst;
3384 PX86PDEPAE pPdeDst;
3385
3386# if PGM_GST_TYPE == PGM_TYPE_PROT
3387 /* AMD-V nested paging */
3388 X86PML4E Pml4eSrc;
3389 X86PDPE PdpeSrc;
3390 PX86PML4E pPml4eSrc = &Pml4eSrc;
3391
3392 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3393 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3394 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3395# endif
3396
3397 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3398 if (rc != VINF_SUCCESS)
3399 {
3400 pgmUnlock(pVM);
3401 AssertRC(rc);
3402 return rc;
3403 }
3404 Assert(pPDDst);
3405 pPdeDst = &pPDDst->a[iPDDst];
3406# endif
3407
3408# if defined(IN_RC)
3409 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3410 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3411# endif
3412
3413 if (!pPdeDst->n.u1Present)
3414 {
3415 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3416 if (rc != VINF_SUCCESS)
3417 {
3418# if defined(IN_RC)
3419 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3420 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3421# endif
3422 pgmUnlock(pVM);
3423 AssertRC(rc);
3424 return rc;
3425 }
3426 }
3427
3428# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3429 /* Check for dirty bit fault */
3430 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3431 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3432 Log(("PGMVerifyAccess: success (dirty)\n"));
3433 else
3434 {
3435 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3436# else
3437 {
3438 GSTPDE PdeSrc;
3439 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3440 PdeSrc.n.u1Present = 1;
3441 PdeSrc.n.u1Write = 1;
3442 PdeSrc.n.u1Accessed = 1;
3443 PdeSrc.n.u1User = 1;
3444
3445# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3446 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3447 if (uErr & X86_TRAP_PF_US)
3448 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3449 else /* supervisor */
3450 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3451
3452 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3453 if (RT_SUCCESS(rc))
3454 {
3455 /* Page was successfully synced */
3456 Log2(("PGMVerifyAccess: success (sync)\n"));
3457 rc = VINF_SUCCESS;
3458 }
3459 else
3460 {
3461 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3462 rc = VINF_EM_RAW_GUEST_TRAP;
3463 }
3464 }
3465# if defined(IN_RC)
3466 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3467 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3468# endif
3469 pgmUnlock(pVM);
3470 return rc;
3471
3472#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3473
3474 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3475 return VERR_INTERNAL_ERROR;
3476#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3477}
3478
3479
3480/**
3481 * Syncs the paging hierarchy starting at CR3.
3482 *
3483 * @returns VBox status code, no specials.
3484 * @param pVCpu The VMCPU handle.
3485 * @param cr0 Guest context CR0 register
3486 * @param cr3 Guest context CR3 register
3487 * @param cr4 Guest context CR4 register
3488 * @param fGlobal Including global page directories or not
3489 */
3490PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3491{
3492 PVM pVM = pVCpu->CTX_SUFF(pVM);
3493
3494 LogFlow(("SyncCR3 fGlobal=%d\n", !!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
3495
3496#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3497
3498 pgmLock(pVM);
3499
3500# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3501 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3502 if (pPool->cDirtyPages)
3503 pgmPoolResetDirtyPages(pVM);
3504# endif
3505
3506 /*
3507 * Update page access handlers.
3508 * The virtual are always flushed, while the physical are only on demand.
3509 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3510 * have to look into that later because it will have a bad influence on the performance.
3511 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3512 * bird: Yes, but that won't work for aliases.
3513 */
3514 /** @todo this MUST go away. See #1557. */
3515 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3516 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3517 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3518 pgmUnlock(pVM);
3519#endif /* !NESTED && !EPT */
3520
3521#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3522 /*
3523 * Nested / EPT - almost no work.
3524 */
3525 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3526 return VINF_SUCCESS;
3527
3528#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3529 /*
3530 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3531 * out the shadow parts when the guest modifies its tables.
3532 */
3533 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3534 return VINF_SUCCESS;
3535
3536#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3537
3538# ifndef PGM_WITHOUT_MAPPINGS
3539 /*
3540 * Check for and resolve conflicts with our guest mappings if they
3541 * are enabled and not fixed.
3542 */
3543 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
3544 {
3545 int rc = pgmMapResolveConflicts(pVM);
3546 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3547 if (rc == VINF_PGM_SYNC_CR3)
3548 {
3549 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3550 return VINF_PGM_SYNC_CR3;
3551 }
3552 }
3553# else
3554 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3555# endif
3556 return VINF_SUCCESS;
3557#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3558}
3559
3560
3561
3562
3563#ifdef VBOX_STRICT
3564#ifdef IN_RC
3565# undef AssertMsgFailed
3566# define AssertMsgFailed Log
3567#endif
3568#ifdef IN_RING3
3569# include <VBox/dbgf.h>
3570
3571/**
3572 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3573 *
3574 * @returns VBox status code (VINF_SUCCESS).
3575 * @param cr3 The root of the hierarchy.
3576 * @param crr The cr4, only PAE and PSE is currently used.
3577 * @param fLongMode Set if long mode, false if not long mode.
3578 * @param cMaxDepth Number of levels to dump.
3579 * @param pHlp Pointer to the output functions.
3580 */
3581RT_C_DECLS_BEGIN
3582VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3583RT_C_DECLS_END
3584
3585#endif
3586
3587/**
3588 * Checks that the shadow page table is in sync with the guest one.
3589 *
3590 * @returns The number of errors.
3591 * @param pVM The virtual machine.
3592 * @param pVCpu The VMCPU handle.
3593 * @param cr3 Guest context CR3 register
3594 * @param cr4 Guest context CR4 register
3595 * @param GCPtr Where to start. Defaults to 0.
3596 * @param cb How much to check. Defaults to everything.
3597 */
3598PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3599{
3600#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3601 return 0;
3602#else
3603 unsigned cErrors = 0;
3604 PVM pVM = pVCpu->CTX_SUFF(pVM);
3605 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3606
3607#if PGM_GST_TYPE == PGM_TYPE_PAE
3608 /** @todo currently broken; crashes below somewhere */
3609 AssertFailed();
3610#endif
3611
3612#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3613 || PGM_GST_TYPE == PGM_TYPE_PAE \
3614 || PGM_GST_TYPE == PGM_TYPE_AMD64
3615
3616# if PGM_GST_TYPE == PGM_TYPE_32BIT
3617 bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
3618# else
3619 bool fBigPagesSupported = true;
3620# endif
3621 PPGMCPU pPGM = &pVCpu->pgm.s;
3622 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3623 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3624# ifndef IN_RING0
3625 RTHCPHYS HCPhys; /* general usage. */
3626# endif
3627 int rc;
3628
3629 /*
3630 * Check that the Guest CR3 and all its mappings are correct.
3631 */
3632 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3633 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3634 false);
3635# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3636# if PGM_GST_TYPE == PGM_TYPE_32BIT
3637 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3638# else
3639 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3640# endif
3641 AssertRCReturn(rc, 1);
3642 HCPhys = NIL_RTHCPHYS;
3643 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3644 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3645# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3646 pgmGstGet32bitPDPtr(pPGM);
3647 RTGCPHYS GCPhys;
3648 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3649 AssertRCReturn(rc, 1);
3650 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3651# endif
3652# endif /* !IN_RING0 */
3653
3654 /*
3655 * Get and check the Shadow CR3.
3656 */
3657# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3658 unsigned cPDEs = X86_PG_ENTRIES;
3659 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3660# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3661# if PGM_GST_TYPE == PGM_TYPE_32BIT
3662 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3663# else
3664 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3665# endif
3666 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3667# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3668 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3669 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3670# endif
3671 if (cb != ~(RTGCPTR)0)
3672 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3673
3674/** @todo call the other two PGMAssert*() functions. */
3675
3676# if PGM_GST_TYPE == PGM_TYPE_AMD64
3677 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3678
3679 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3680 {
3681 PPGMPOOLPAGE pShwPdpt = NULL;
3682 PX86PML4E pPml4eSrc;
3683 PX86PML4E pPml4eDst;
3684 RTGCPHYS GCPhysPdptSrc;
3685
3686 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3687 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3688
3689 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3690 if (!pPml4eDst->n.u1Present)
3691 {
3692 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3693 continue;
3694 }
3695
3696 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3697 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3698
3699 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3700 {
3701 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3702 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3703 cErrors++;
3704 continue;
3705 }
3706
3707 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3708 {
3709 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3710 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3711 cErrors++;
3712 continue;
3713 }
3714
3715 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3716 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3717 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3718 {
3719 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3720 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3721 cErrors++;
3722 continue;
3723 }
3724# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3725 {
3726# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3727
3728# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3729 /*
3730 * Check the PDPTEs too.
3731 */
3732 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3733
3734 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3735 {
3736 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3737 PPGMPOOLPAGE pShwPde = NULL;
3738 PX86PDPE pPdpeDst;
3739 RTGCPHYS GCPhysPdeSrc;
3740# if PGM_GST_TYPE == PGM_TYPE_PAE
3741 X86PDPE PdpeSrc;
3742 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3743 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3744# else
3745 PX86PML4E pPml4eSrcIgn;
3746 X86PDPE PdpeSrc;
3747 PX86PDPT pPdptDst;
3748 PX86PDPAE pPDDst;
3749 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3750
3751 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3752 if (rc != VINF_SUCCESS)
3753 {
3754 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3755 GCPtr += 512 * _2M;
3756 continue; /* next PDPTE */
3757 }
3758 Assert(pPDDst);
3759# endif
3760 Assert(iPDSrc == 0);
3761
3762 pPdpeDst = &pPdptDst->a[iPdpt];
3763
3764 if (!pPdpeDst->n.u1Present)
3765 {
3766 GCPtr += 512 * _2M;
3767 continue; /* next PDPTE */
3768 }
3769
3770 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3771 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3772
3773 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3774 {
3775 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3776 GCPtr += 512 * _2M;
3777 cErrors++;
3778 continue;
3779 }
3780
3781 if (GCPhysPdeSrc != pShwPde->GCPhys)
3782 {
3783# if PGM_GST_TYPE == PGM_TYPE_AMD64
3784 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3785# else
3786 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3787# endif
3788 GCPtr += 512 * _2M;
3789 cErrors++;
3790 continue;
3791 }
3792
3793# if PGM_GST_TYPE == PGM_TYPE_AMD64
3794 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3795 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3796 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3797 {
3798 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3799 GCPtr += 512 * _2M;
3800 cErrors++;
3801 continue;
3802 }
3803# endif
3804
3805# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3806 {
3807# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3808# if PGM_GST_TYPE == PGM_TYPE_32BIT
3809 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3810# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3811 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3812# endif
3813# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3814 /*
3815 * Iterate the shadow page directory.
3816 */
3817 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3818 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3819
3820 for (;
3821 iPDDst < cPDEs;
3822 iPDDst++, GCPtr += cIncrement)
3823 {
3824# if PGM_SHW_TYPE == PGM_TYPE_PAE
3825 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3826# else
3827 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3828# endif
3829 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3830 {
3831 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3832 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3833 {
3834 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3835 cErrors++;
3836 continue;
3837 }
3838 }
3839 else if ( (PdeDst.u & X86_PDE_P)
3840 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3841 )
3842 {
3843 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3844 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3845 if (!pPoolPage)
3846 {
3847 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3848 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3849 cErrors++;
3850 continue;
3851 }
3852 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3853
3854 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3855 {
3856 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3857 GCPtr, (uint64_t)PdeDst.u));
3858 cErrors++;
3859 }
3860
3861 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3862 {
3863 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3864 GCPtr, (uint64_t)PdeDst.u));
3865 cErrors++;
3866 }
3867
3868 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3869 if (!PdeSrc.n.u1Present)
3870 {
3871 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3872 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3873 cErrors++;
3874 continue;
3875 }
3876
3877 if ( !PdeSrc.b.u1Size
3878 || !fBigPagesSupported)
3879 {
3880 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3881# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3882 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3883# endif
3884 }
3885 else
3886 {
3887# if PGM_GST_TYPE == PGM_TYPE_32BIT
3888 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3889 {
3890 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3891 GCPtr, (uint64_t)PdeSrc.u));
3892 cErrors++;
3893 continue;
3894 }
3895# endif
3896 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3897# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3898 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3899# endif
3900 }
3901
3902 if ( pPoolPage->enmKind
3903 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3904 {
3905 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3906 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3907 cErrors++;
3908 }
3909
3910 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3911 if (!pPhysPage)
3912 {
3913 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3914 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3915 cErrors++;
3916 continue;
3917 }
3918
3919 if (GCPhysGst != pPoolPage->GCPhys)
3920 {
3921 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3922 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3923 cErrors++;
3924 continue;
3925 }
3926
3927 if ( !PdeSrc.b.u1Size
3928 || !fBigPagesSupported)
3929 {
3930 /*
3931 * Page Table.
3932 */
3933 const GSTPT *pPTSrc;
3934 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3935 if (RT_FAILURE(rc))
3936 {
3937 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3938 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3939 cErrors++;
3940 continue;
3941 }
3942 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3943 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3944 {
3945 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3946 // (This problem will go away when/if we shadow multiple CR3s.)
3947 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3948 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3949 cErrors++;
3950 continue;
3951 }
3952 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3953 {
3954 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3955 GCPtr, (uint64_t)PdeDst.u));
3956 cErrors++;
3957 continue;
3958 }
3959
3960 /* iterate the page table. */
3961# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3962 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3963 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3964# else
3965 const unsigned offPTSrc = 0;
3966# endif
3967 for (unsigned iPT = 0, off = 0;
3968 iPT < RT_ELEMENTS(pPTDst->a);
3969 iPT++, off += PAGE_SIZE)
3970 {
3971 const SHWPTE PteDst = pPTDst->a[iPT];
3972
3973 /* skip not-present entries. */
3974 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3975 continue;
3976 Assert(PteDst.n.u1Present);
3977
3978 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3979 if (!PteSrc.n.u1Present)
3980 {
3981# ifdef IN_RING3
3982 PGMAssertHandlerAndFlagsInSync(pVM);
3983 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3984# endif
3985 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3986 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3987 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3988 cErrors++;
3989 continue;
3990 }
3991
3992 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3993# if 1 /** @todo sync accessed bit properly... */
3994 fIgnoreFlags |= X86_PTE_A;
3995# endif
3996
3997 /* match the physical addresses */
3998 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3999 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
4000
4001# ifdef IN_RING3
4002 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4003 if (RT_FAILURE(rc))
4004 {
4005 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4006 {
4007 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4008 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4009 cErrors++;
4010 continue;
4011 }
4012 }
4013 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4014 {
4015 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4016 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4017 cErrors++;
4018 continue;
4019 }
4020# endif
4021
4022 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4023 if (!pPhysPage)
4024 {
4025# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4026 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4027 {
4028 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4029 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4030 cErrors++;
4031 continue;
4032 }
4033# endif
4034 if (PteDst.n.u1Write)
4035 {
4036 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4037 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4038 cErrors++;
4039 }
4040 fIgnoreFlags |= X86_PTE_RW;
4041 }
4042 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4043 {
4044 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4045 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4046 cErrors++;
4047 continue;
4048 }
4049
4050 /* flags */
4051 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4052 {
4053 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4054 {
4055 if (PteDst.n.u1Write)
4056 {
4057 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4058 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4059 cErrors++;
4060 continue;
4061 }
4062 fIgnoreFlags |= X86_PTE_RW;
4063 }
4064 else
4065 {
4066 if (PteDst.n.u1Present)
4067 {
4068 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4069 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4070 cErrors++;
4071 continue;
4072 }
4073 fIgnoreFlags |= X86_PTE_P;
4074 }
4075 }
4076 else
4077 {
4078 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4079 {
4080 if (PteDst.n.u1Write)
4081 {
4082 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4083 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4084 cErrors++;
4085 continue;
4086 }
4087 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4088 {
4089 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4090 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4091 cErrors++;
4092 continue;
4093 }
4094 if (PteDst.n.u1Dirty)
4095 {
4096 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4097 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4098 cErrors++;
4099 }
4100# if 0 /** @todo sync access bit properly... */
4101 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4102 {
4103 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4104 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4105 cErrors++;
4106 }
4107 fIgnoreFlags |= X86_PTE_RW;
4108# else
4109 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4110# endif
4111 }
4112 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4113 {
4114 /* access bit emulation (not implemented). */
4115 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4116 {
4117 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4118 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4119 cErrors++;
4120 continue;
4121 }
4122 if (!PteDst.n.u1Accessed)
4123 {
4124 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4125 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4126 cErrors++;
4127 }
4128 fIgnoreFlags |= X86_PTE_P;
4129 }
4130# ifdef DEBUG_sandervl
4131 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4132# endif
4133 }
4134
4135 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4136 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4137 )
4138 {
4139 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4140 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4141 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4142 cErrors++;
4143 continue;
4144 }
4145 } /* foreach PTE */
4146 }
4147 else
4148 {
4149 /*
4150 * Big Page.
4151 */
4152 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4153 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4154 {
4155 if (PdeDst.n.u1Write)
4156 {
4157 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4158 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4159 cErrors++;
4160 continue;
4161 }
4162 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4163 {
4164 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4165 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4166 cErrors++;
4167 continue;
4168 }
4169# if 0 /** @todo sync access bit properly... */
4170 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4171 {
4172 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4173 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4174 cErrors++;
4175 }
4176 fIgnoreFlags |= X86_PTE_RW;
4177# else
4178 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4179# endif
4180 }
4181 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4182 {
4183 /* access bit emulation (not implemented). */
4184 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4185 {
4186 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4187 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4188 cErrors++;
4189 continue;
4190 }
4191 if (!PdeDst.n.u1Accessed)
4192 {
4193 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4194 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4195 cErrors++;
4196 }
4197 fIgnoreFlags |= X86_PTE_P;
4198 }
4199
4200 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4201 {
4202 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4203 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4204 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4205 cErrors++;
4206 }
4207
4208 /* iterate the page table. */
4209 for (unsigned iPT = 0, off = 0;
4210 iPT < RT_ELEMENTS(pPTDst->a);
4211 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4212 {
4213 const SHWPTE PteDst = pPTDst->a[iPT];
4214
4215 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4216 {
4217 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4218 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4219 cErrors++;
4220 }
4221
4222 /* skip not-present entries. */
4223 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4224 continue;
4225
4226 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4227
4228 /* match the physical addresses */
4229 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4230
4231# ifdef IN_RING3
4232 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4233 if (RT_FAILURE(rc))
4234 {
4235 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4236 {
4237 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4238 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4239 cErrors++;
4240 }
4241 }
4242 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4243 {
4244 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4245 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4246 cErrors++;
4247 continue;
4248 }
4249# endif
4250 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4251 if (!pPhysPage)
4252 {
4253# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4254 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4255 {
4256 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4257 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4258 cErrors++;
4259 continue;
4260 }
4261# endif
4262 if (PteDst.n.u1Write)
4263 {
4264 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4265 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4266 cErrors++;
4267 }
4268 fIgnoreFlags |= X86_PTE_RW;
4269 }
4270 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4271 {
4272 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4273 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4274 cErrors++;
4275 continue;
4276 }
4277
4278 /* flags */
4279 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4280 {
4281 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4282 {
4283 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4284 {
4285 if (PteDst.n.u1Write)
4286 {
4287 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4288 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4289 cErrors++;
4290 continue;
4291 }
4292 fIgnoreFlags |= X86_PTE_RW;
4293 }
4294 }
4295 else
4296 {
4297 if (PteDst.n.u1Present)
4298 {
4299 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4300 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4301 cErrors++;
4302 continue;
4303 }
4304 fIgnoreFlags |= X86_PTE_P;
4305 }
4306 }
4307
4308 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4309 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4310 )
4311 {
4312 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4313 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4314 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4315 cErrors++;
4316 continue;
4317 }
4318 } /* for each PTE */
4319 }
4320 }
4321 /* not present */
4322
4323 } /* for each PDE */
4324
4325 } /* for each PDPTE */
4326
4327 } /* for each PML4E */
4328
4329# ifdef DEBUG
4330 if (cErrors)
4331 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4332# endif
4333
4334#endif /* GST == 32BIT, PAE or AMD64 */
4335 return cErrors;
4336
4337#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4338}
4339#endif /* VBOX_STRICT */
4340
4341
4342/**
4343 * Sets up the CR3 for shadow paging
4344 *
4345 * @returns Strict VBox status code.
4346 * @retval VINF_SUCCESS.
4347 *
4348 * @param pVCpu The VMCPU handle.
4349 * @param GCPhysCR3 The physical address in the CR3 register.
4350 */
4351PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4352{
4353 PVM pVM = pVCpu->CTX_SUFF(pVM);
4354
4355 /* Update guest paging info. */
4356#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4357 || PGM_GST_TYPE == PGM_TYPE_PAE \
4358 || PGM_GST_TYPE == PGM_TYPE_AMD64
4359
4360 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4361
4362 /*
4363 * Map the page CR3 points at.
4364 */
4365 RTHCPTR HCPtrGuestCR3;
4366 RTHCPHYS HCPhysGuestCR3;
4367 pgmLock(pVM);
4368 PPGMPAGE pPageCR3 = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4369 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4370 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4371 /** @todo this needs some reworking wrt. locking. */
4372# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4373 HCPtrGuestCR3 = NIL_RTHCPTR;
4374 int rc = VINF_SUCCESS;
4375# else
4376 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4377# endif
4378 pgmUnlock(pVM);
4379 if (RT_SUCCESS(rc))
4380 {
4381 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4382 if (RT_SUCCESS(rc))
4383 {
4384# ifdef IN_RC
4385 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4386# endif
4387# if PGM_GST_TYPE == PGM_TYPE_32BIT
4388 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4389# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4390 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4391# endif
4392 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4393
4394# elif PGM_GST_TYPE == PGM_TYPE_PAE
4395 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4396 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4397# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4398 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4399# endif
4400 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4401 Log(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4402
4403 /*
4404 * Map the 4 PDs too.
4405 */
4406 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVCpu->pgm.s);
4407 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4408 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4409 {
4410 if (pGuestPDPT->a[i].n.u1Present)
4411 {
4412 RTHCPTR HCPtr;
4413 RTHCPHYS HCPhys;
4414 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4415 pgmLock(pVM);
4416 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4417 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4418 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4419# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4420 HCPtr = NIL_RTHCPTR;
4421 int rc2 = VINF_SUCCESS;
4422# else
4423 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4424# endif
4425 pgmUnlock(pVM);
4426 if (RT_SUCCESS(rc2))
4427 {
4428 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4429 AssertRCReturn(rc, rc);
4430
4431 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4432# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4433 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4434# endif
4435 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4436 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4437# ifdef IN_RC
4438 PGM_INVL_PG(pVCpu, GCPtr);
4439# endif
4440 continue;
4441 }
4442 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4443 }
4444
4445 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4446# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4447 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4448# endif
4449 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4450 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4451# ifdef IN_RC
4452 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4453# endif
4454 }
4455
4456# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4457 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4458# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4459 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4460# endif
4461# endif
4462 }
4463 else
4464 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4465 }
4466 else
4467 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4468
4469#else /* prot/real stub */
4470 int rc = VINF_SUCCESS;
4471#endif
4472
4473 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4474# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4475 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4476 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4477 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4478 && PGM_GST_TYPE != PGM_TYPE_PROT))
4479
4480 Assert(!HWACCMIsNestedPagingActive(pVM));
4481
4482 /*
4483 * Update the shadow root page as well since that's not fixed.
4484 */
4485 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4486 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4487 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4488 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4489 PPGMPOOLPAGE pNewShwPageCR3;
4490
4491 pgmLock(pVM);
4492
4493# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4494 if (pPool->cDirtyPages)
4495 pgmPoolResetDirtyPages(pVM);
4496# endif
4497
4498 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4499 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4500 AssertFatalRC(rc);
4501 rc = VINF_SUCCESS;
4502
4503# ifdef IN_RC
4504 /*
4505 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4506 * state will be inconsistent! Flush important things now while
4507 * we still can and then make sure there are no ring-3 calls.
4508 */
4509 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4510 VMMRZCallRing3Disable(pVCpu);
4511# endif
4512
4513 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4514 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4515 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4516# ifdef IN_RING0
4517 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4518 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4519# elif defined(IN_RC)
4520 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4521 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4522# else
4523 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4524 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4525# endif
4526
4527# ifndef PGM_WITHOUT_MAPPINGS
4528 /*
4529 * Apply all hypervisor mappings to the new CR3.
4530 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4531 * make sure we check for conflicts in the new CR3 root.
4532 */
4533# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4534 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4535# endif
4536 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4537 AssertRCReturn(rc, rc);
4538# endif
4539
4540 /* Set the current hypervisor CR3. */
4541 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4542 SELMShadowCR3Changed(pVM, pVCpu);
4543
4544# ifdef IN_RC
4545 /* NOTE: The state is consistent again. */
4546 VMMRZCallRing3Enable(pVCpu);
4547# endif
4548
4549 /* Clean up the old CR3 root. */
4550 if ( pOldShwPageCR3
4551 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4552 {
4553 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4554# ifndef PGM_WITHOUT_MAPPINGS
4555 /* Remove the hypervisor mappings from the shadow page table. */
4556 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4557# endif
4558 /* Mark the page as unlocked; allow flushing again. */
4559 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4560
4561 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4562 }
4563 pgmUnlock(pVM);
4564# endif
4565
4566 return rc;
4567}
4568
4569/**
4570 * Unmaps the shadow CR3.
4571 *
4572 * @returns VBox status, no specials.
4573 * @param pVCpu The VMCPU handle.
4574 */
4575PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4576{
4577 LogFlow(("UnmapCR3\n"));
4578
4579 int rc = VINF_SUCCESS;
4580 PVM pVM = pVCpu->CTX_SUFF(pVM);
4581
4582 /*
4583 * Update guest paging info.
4584 */
4585#if PGM_GST_TYPE == PGM_TYPE_32BIT
4586 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4587# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4588 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4589# endif
4590 pVCpu->pgm.s.pGst32BitPdRC = 0;
4591
4592#elif PGM_GST_TYPE == PGM_TYPE_PAE
4593 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4594# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4595 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4596# endif
4597 pVCpu->pgm.s.pGstPaePdptRC = 0;
4598 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4599 {
4600 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4601# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4602 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4603# endif
4604 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4605 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4606 }
4607
4608#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4609 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4610# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4611 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4612# endif
4613
4614#else /* prot/real mode stub */
4615 /* nothing to do */
4616#endif
4617
4618#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4619 /*
4620 * Update shadow paging info.
4621 */
4622# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4623 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4624 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4625
4626# if PGM_GST_TYPE != PGM_TYPE_REAL
4627 Assert(!HWACCMIsNestedPagingActive(pVM));
4628# endif
4629
4630 pgmLock(pVM);
4631
4632# ifndef PGM_WITHOUT_MAPPINGS
4633 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4634 /* Remove the hypervisor mappings from the shadow page table. */
4635 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4636# endif
4637
4638 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4639 {
4640 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4641
4642 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4643
4644# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4645 if (pPool->cDirtyPages)
4646 pgmPoolResetDirtyPages(pVM);
4647# endif
4648
4649 /* Mark the page as unlocked; allow flushing again. */
4650 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4651
4652 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4653 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4654 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4655 pVCpu->pgm.s.pShwPageCR3RC = 0;
4656 pVCpu->pgm.s.iShwUser = 0;
4657 pVCpu->pgm.s.iShwUserTable = 0;
4658 }
4659 pgmUnlock(pVM);
4660# endif
4661#endif /* !IN_RC*/
4662
4663 return rc;
4664}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette