VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 28090

Last change on this file since 28090 was 28090, checked in by vboxsync, 15 years ago

Check for write conflicts with our hypervisor mapping early on. If the guest happens to access a non-present page,
where our hypervisor is currently mapped, then we'll create a #PF storm in the guest.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 200.5 KB
Line 
1/* $Id: PGMAllBth.h 28090 2010-04-08 13:29:25Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27RT_C_DECLS_BEGIN
28PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
29PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
34PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
35PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
36PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
37#ifdef VBOX_STRICT
38PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
39#endif
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
42PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
43RT_C_DECLS_END
44
45
46/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
47#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
48# error "Invalid combination; PAE guest implies PAE shadow"
49#endif
50
51#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
52 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
53# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
54#endif
55
56#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
57 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
58# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
59#endif
60
61#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
62 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
63# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
64#endif
65
66
67#ifndef IN_RING3
68/**
69 * #PF Handler for raw-mode guest execution.
70 *
71 * @returns VBox status code (appropriate for trap handling and GC return).
72 *
73 * @param pVCpu VMCPU Handle.
74 * @param uErr The trap error code.
75 * @param pRegFrame Trap register frame.
76 * @param pvFault The fault address.
77 * @param pfLockTaken PGM lock taken here or not (out)
78 */
79PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
80{
81 PVM pVM = pVCpu->CTX_SUFF(pVM);
82
83 *pfLockTaken = false;
84
85# if defined(IN_RC) && defined(VBOX_STRICT)
86 PGMDynCheckLocks(pVM);
87# endif
88
89# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
90 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
91 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
92
93# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
94 /*
95 * Hide the instruction fetch trap indicator for now.
96 */
97 /** @todo NXE will change this and we must fix NXE in the switcher too! */
98 if (uErr & X86_TRAP_PF_ID)
99 {
100 uErr &= ~X86_TRAP_PF_ID;
101 TRPMSetErrorCode(pVCpu, uErr);
102 }
103# endif
104
105 /*
106 * Get PDs.
107 */
108 int rc;
109# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
110# if PGM_GST_TYPE == PGM_TYPE_32BIT
111 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
112 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
113
114# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
115
116# if PGM_GST_TYPE == PGM_TYPE_PAE
117 unsigned iPDSrc = 0; /* initialized to shut up gcc */
118 X86PDPE PdpeSrc;
119 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
120
121# elif PGM_GST_TYPE == PGM_TYPE_AMD64
122 unsigned iPDSrc = 0; /* initialized to shut up gcc */
123 PX86PML4E pPml4eSrc;
124 X86PDPE PdpeSrc;
125 PGSTPD pPDSrc;
126
127 pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
128 Assert(pPml4eSrc);
129# endif
130
131 /* Quick check for a valid guest trap. (PAE & AMD64) */
132 if (!pPDSrc)
133 {
134# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
135 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
136# else
137 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
138# endif
139 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
140 TRPMSetErrorCode(pVCpu, uErr);
141 return VINF_EM_RAW_GUEST_TRAP;
142 }
143# endif
144
145# else /* !PGM_WITH_PAGING */
146 PGSTPD pPDSrc = NULL;
147 const unsigned iPDSrc = 0;
148# endif /* !PGM_WITH_PAGING */
149
150# if !defined(PGM_WITHOUT_MAPPINGS) && ((PGM_GST_TYPE == PGM_TYPE_32BIT) || (PGM_GST_TYPE == PGM_TYPE_PAE))
151 /*
152 * Check for write conflicts with our hypervisor mapping early on. If the guest happens to access a non-present page,
153 * where our hypervisor is currently mapped, then we'll create a #PF storm in the guest.
154 */
155 if ((uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW))
156 {
157 pgmLock(pVM);
158# if PGM_SHW_TYPE == PGM_TYPE_32BIT
159 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
160 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
161# else /* PGM_SHW_TYPE == PGM_TYPE_PAE */
162 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
163
164 PX86PDPAE pPDDst;
165# if PGM_GST_TYPE != PGM_TYPE_PAE
166 X86PDPE PdpeSrc;
167
168 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
169 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
170# endif
171 int rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
172 if (rc != VINF_SUCCESS)
173 {
174 pgmUnlock(pVM);
175 AssertRC(rc);
176 return rc;
177 }
178 Assert(pPDDst);
179# endif
180 if (pPDDst->a[iPDDst].u & PGM_PDFLAGS_MAPPING)
181 {
182 pgmUnlock(pVM);
183 /* Force a CR3 sync to check for conflicts and emulate the instruction. */
184 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
185 return VINF_EM_RAW_EMULATE_INSTR;
186 }
187 pgmUnlock(pVM);
188 }
189# endif
190
191 /* First check for a genuine guest page fault. */
192# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
193 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
194 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDSrc->a[iPDSrc], pvFault);
195 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
196 if (rc == VINF_EM_RAW_GUEST_TRAP)
197 {
198 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
199 = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
200 return rc;
201 }
202# endif /* PGM_WITH_PAGING */
203
204 /* Take the big lock now. */
205 *pfLockTaken = true;
206 pgmLock(pVM);
207
208 /* Fetch the guest PDE */
209# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
210 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
211# else
212 GSTPDE PdeSrc;
213 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
214 PdeSrc.n.u1Present = 1;
215 PdeSrc.n.u1Write = 1;
216 PdeSrc.n.u1Accessed = 1;
217 PdeSrc.n.u1User = 1;
218# endif
219
220# if PGM_SHW_TYPE == PGM_TYPE_32BIT
221 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
222 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
223
224# elif PGM_SHW_TYPE == PGM_TYPE_PAE
225 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
226
227 PX86PDPAE pPDDst;
228# if PGM_GST_TYPE != PGM_TYPE_PAE
229 X86PDPE PdpeSrc;
230
231 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
232 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
233# endif
234 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
235 if (rc != VINF_SUCCESS)
236 {
237 AssertRC(rc);
238 return rc;
239 }
240 Assert(pPDDst);
241
242# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
243 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
244 PX86PDPAE pPDDst;
245# if PGM_GST_TYPE == PGM_TYPE_PROT
246 /* AMD-V nested paging */
247 X86PML4E Pml4eSrc;
248 X86PDPE PdpeSrc;
249 PX86PML4E pPml4eSrc = &Pml4eSrc;
250
251 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
252 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
253 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
254# endif
255
256 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
257 if (rc != VINF_SUCCESS)
258 {
259 AssertRC(rc);
260 return rc;
261 }
262 Assert(pPDDst);
263
264# elif PGM_SHW_TYPE == PGM_TYPE_EPT
265 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
266 PEPTPD pPDDst;
267
268 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
269 if (rc != VINF_SUCCESS)
270 {
271 AssertRC(rc);
272 return rc;
273 }
274 Assert(pPDDst);
275# endif
276
277# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
278 /* Dirty page handling. */
279 if (uErr & X86_TRAP_PF_RW) /* write fault? */
280 {
281 /*
282 * If we successfully correct the write protection fault due to dirty bit
283 * tracking, then return immediately.
284 */
285 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
286 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
287 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
288 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
289 {
290 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
291 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
292 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
293 return VINF_SUCCESS;
294 }
295 }
296
297# if 0 /* rarely useful; leave for debugging. */
298 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
299# endif
300# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
301
302 /*
303 * A common case is the not-present error caused by lazy page table syncing.
304 *
305 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
306 * so we can safely assume that the shadow PT is present when calling SyncPage later.
307 *
308 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
309 * of mapping conflict and defer to SyncCR3 in R3.
310 * (Again, we do NOT support access handlers for non-present guest pages.)
311 *
312 */
313 Assert(PdeSrc.n.u1Present);
314 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
315 && !pPDDst->a[iPDDst].n.u1Present
316 )
317 {
318 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
319 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
320 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
321 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
322 if (RT_SUCCESS(rc))
323 {
324 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
325 return rc;
326 }
327 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
328 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
329 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
330 return VINF_PGM_SYNC_CR3;
331 }
332
333# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
334 /*
335 * Check if this address is within any of our mappings.
336 *
337 * This is *very* fast and it's gonna save us a bit of effort below and prevent
338 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
339 * (BTW, it's impossible to have physical access handlers in a mapping.)
340 */
341 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
342 {
343 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
344 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
345 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
346 {
347 if (pvFault < pMapping->GCPtr)
348 break;
349 if (pvFault - pMapping->GCPtr < pMapping->cb)
350 {
351 /*
352 * The first thing we check is if we've got an undetected conflict.
353 */
354 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
355 {
356 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
357 while (iPT-- > 0)
358 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
359 {
360 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
361 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
362 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
363 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
364 return VINF_PGM_SYNC_CR3;
365 }
366 }
367
368 /*
369 * Check if the fault address is in a virtual page access handler range.
370 */
371 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
372 if ( pCur
373 && pvFault - pCur->Core.Key < pCur->cb
374 && uErr & X86_TRAP_PF_RW)
375 {
376# ifdef IN_RC
377 STAM_PROFILE_START(&pCur->Stat, h);
378 pgmUnlock(pVM);
379 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
380 pgmLock(pVM);
381 STAM_PROFILE_STOP(&pCur->Stat, h);
382# else
383 AssertFailed();
384 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
385# endif
386 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
387 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
388 return rc;
389 }
390
391 /*
392 * Pretend we're not here and let the guest handle the trap.
393 */
394 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
395 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
396 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
397 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
398 return VINF_EM_RAW_GUEST_TRAP;
399 }
400 }
401 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
402 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
403# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
404
405 /*
406 * Check if this fault address is flagged for special treatment,
407 * which means we'll have to figure out the physical address and
408 * check flags associated with it.
409 *
410 * ASSUME that we can limit any special access handling to pages
411 * in page tables which the guest believes to be present.
412 */
413 Assert(PdeSrc.n.u1Present);
414 {
415 RTGCPHYS GCPhys = NIL_RTGCPHYS;
416
417# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
418 if ( PdeSrc.b.u1Size
419# if PGM_GST_TYPE == PGM_TYPE_32BIT
420 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
421# endif
422 )
423 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
424 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
425 else
426 {
427 PGSTPT pPTSrc;
428 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
429 if (RT_SUCCESS(rc))
430 {
431 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
432 if (pPTSrc->a[iPTESrc].n.u1Present)
433 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
434 }
435 }
436# else
437 /* No paging so the fault address is the physical address */
438 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
439# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
440
441 /*
442 * If we have a GC address we'll check if it has any flags set.
443 */
444 if (GCPhys != NIL_RTGCPHYS)
445 {
446 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
447
448 PPGMPAGE pPage;
449 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
450 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
451 {
452 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
453 {
454 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
455 {
456 /*
457 * Physical page access handler.
458 */
459 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
460 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
461 if (pCur)
462 {
463# ifdef PGM_SYNC_N_PAGES
464 /*
465 * If the region is write protected and we got a page not present fault, then sync
466 * the pages. If the fault was caused by a read, then restart the instruction.
467 * In case of write access continue to the GC write handler.
468 *
469 * ASSUMES that there is only one handler per page or that they have similar write properties.
470 */
471 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
472 && !(uErr & X86_TRAP_PF_P))
473 {
474 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
475 if ( RT_FAILURE(rc)
476 || !(uErr & X86_TRAP_PF_RW)
477 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
478 {
479 AssertRC(rc);
480 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
481 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
482 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
483 return rc;
484 }
485 }
486# endif
487
488 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
489 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
490 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
491
492# if defined(IN_RC) || defined(IN_RING0)
493 if (pCur->CTX_SUFF(pfnHandler))
494 {
495 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
496# ifdef IN_RING0
497 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
498# else
499 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
500# endif
501 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
502 void *pvUser = pCur->CTX_SUFF(pvUser);
503
504 STAM_PROFILE_START(&pCur->Stat, h);
505 if (fLeaveLock)
506 pgmUnlock(pVM); /* @todo: Not entirely safe. */
507
508 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
509 if (fLeaveLock)
510 pgmLock(pVM);
511# ifdef VBOX_WITH_STATISTICS
512 pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
513 if (pCur)
514 STAM_PROFILE_STOP(&pCur->Stat, h);
515# else
516 pCur = NULL; /* might be invalid by now. */
517# endif
518
519 }
520 else
521# endif
522 rc = VINF_EM_RAW_EMULATE_INSTR;
523
524 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
525 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
526 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
527 return rc;
528 }
529 }
530# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
531 else
532 {
533# ifdef PGM_SYNC_N_PAGES
534 /*
535 * If the region is write protected and we got a page not present fault, then sync
536 * the pages. If the fault was caused by a read, then restart the instruction.
537 * In case of write access continue to the GC write handler.
538 */
539 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
540 && !(uErr & X86_TRAP_PF_P))
541 {
542 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
543 if ( RT_FAILURE(rc)
544 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
545 || !(uErr & X86_TRAP_PF_RW))
546 {
547 AssertRC(rc);
548 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
549 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
550 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
551 return rc;
552 }
553 }
554# endif
555 /*
556 * Ok, it's an virtual page access handler.
557 *
558 * Since it's faster to search by address, we'll do that first
559 * and then retry by GCPhys if that fails.
560 */
561 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
562 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
563 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
564 */
565 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
566 if (pCur)
567 {
568 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
569 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
570 || !(uErr & X86_TRAP_PF_P)
571 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
572 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
573
574 if ( pvFault - pCur->Core.Key < pCur->cb
575 && ( uErr & X86_TRAP_PF_RW
576 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
577 {
578# ifdef IN_RC
579 STAM_PROFILE_START(&pCur->Stat, h);
580 pgmUnlock(pVM);
581 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
582 pgmLock(pVM);
583 STAM_PROFILE_STOP(&pCur->Stat, h);
584# else
585 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
586# endif
587 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
588 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
589 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
590 return rc;
591 }
592 /* Unhandled part of a monitored page */
593 }
594 else
595 {
596 /* Check by physical address. */
597 unsigned iPage;
598 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
599 &pCur, &iPage);
600 Assert(RT_SUCCESS(rc) || !pCur);
601 if ( pCur
602 && ( uErr & X86_TRAP_PF_RW
603 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
604 {
605 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
606# ifdef IN_RC
607 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
608 Assert(off < pCur->cb);
609 STAM_PROFILE_START(&pCur->Stat, h);
610 pgmUnlock(pVM);
611 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
612 pgmLock(pVM);
613 STAM_PROFILE_STOP(&pCur->Stat, h);
614# else
615 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
616# endif
617 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
618 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
619 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
620 return rc;
621 }
622 }
623 }
624# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
625
626 /*
627 * There is a handled area of the page, but this fault doesn't belong to it.
628 * We must emulate the instruction.
629 *
630 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
631 * we first check if this was a page-not-present fault for a page with only
632 * write access handlers. Restart the instruction if it wasn't a write access.
633 */
634 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
635
636 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
637 && !(uErr & X86_TRAP_PF_P))
638 {
639 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
640 if ( RT_FAILURE(rc)
641 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
642 || !(uErr & X86_TRAP_PF_RW))
643 {
644 AssertRC(rc);
645 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
646 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
647 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
648 return rc;
649 }
650 }
651
652 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
653 * It's writing to an unhandled part of the LDT page several million times.
654 */
655 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
656 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
657 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
658 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
659 return rc;
660 } /* if any kind of handler */
661
662# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
663 if (uErr & X86_TRAP_PF_P)
664 {
665 /*
666 * The page isn't marked, but it might still be monitored by a virtual page access handler.
667 * (ASSUMES no temporary disabling of virtual handlers.)
668 */
669 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
670 * we should correct both the shadow page table and physical memory flags, and not only check for
671 * accesses within the handler region but for access to pages with virtual handlers. */
672 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
673 if (pCur)
674 {
675 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
676 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
677 || !(uErr & X86_TRAP_PF_P)
678 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
679 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
680
681 if ( pvFault - pCur->Core.Key < pCur->cb
682 && ( uErr & X86_TRAP_PF_RW
683 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
684 {
685# ifdef IN_RC
686 STAM_PROFILE_START(&pCur->Stat, h);
687 pgmUnlock(pVM);
688 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
689 pgmLock(pVM);
690 STAM_PROFILE_STOP(&pCur->Stat, h);
691# else
692 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
693# endif
694 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
695 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
696 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
697 return rc;
698 }
699 }
700 }
701# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
702 }
703 else
704 {
705 /*
706 * When the guest accesses invalid physical memory (e.g. probing
707 * of RAM or accessing a remapped MMIO range), then we'll fall
708 * back to the recompiler to emulate the instruction.
709 */
710 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
711 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
712 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
713 return VINF_EM_RAW_EMULATE_INSTR;
714 }
715
716 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
717
718# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
719 /*
720 * We are here only if page is present in Guest page tables and
721 * trap is not handled by our handlers.
722 *
723 * Check it for page out-of-sync situation.
724 */
725 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
726
727 if (!(uErr & X86_TRAP_PF_P))
728 {
729 /*
730 * Page is not present in our page tables.
731 * Try to sync it!
732 * BTW, fPageShw is invalid in this branch!
733 */
734 if (uErr & X86_TRAP_PF_US)
735 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
736 else /* supervisor */
737 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
738
739 if (PGM_PAGE_IS_BALLOONED(pPage))
740 {
741 /* Emulate reads from ballooned pages as they are not present in our shadow page tables. (required for e.g. Solaris guests; soft ecc, random nr generator) */
742 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
743 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
744 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
745 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
746 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
747 return rc;
748 }
749# if defined(LOG_ENABLED) && !defined(IN_RING0)
750 RTGCPHYS GCPhys2;
751 uint64_t fPageGst2;
752 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
753 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
754 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
755# endif /* LOG_ENABLED */
756
757# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
758 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
759 {
760 uint64_t fPageGst;
761 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
762 if ( RT_SUCCESS(rc)
763 && !(fPageGst & X86_PTE_US))
764 {
765 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
766 if ( pvFault == (RTGCPTR)pRegFrame->eip
767 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
768# ifdef CSAM_DETECT_NEW_CODE_PAGES
769 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
770 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
771# endif /* CSAM_DETECT_NEW_CODE_PAGES */
772 )
773 {
774 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
775 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
776 if (rc != VINF_SUCCESS)
777 {
778 /*
779 * CSAM needs to perform a job in ring 3.
780 *
781 * Sync the page before going to the host context; otherwise we'll end up in a loop if
782 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
783 */
784 LogFlow(("CSAM ring 3 job\n"));
785 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
786 AssertRC(rc2);
787
788 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
789 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
790 return rc;
791 }
792 }
793# ifdef CSAM_DETECT_NEW_CODE_PAGES
794 else if ( uErr == X86_TRAP_PF_RW
795 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
796 && pRegFrame->ecx < 0x10000)
797 {
798 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
799 * to detect loading of new code pages.
800 */
801
802 /*
803 * Decode the instruction.
804 */
805 RTGCPTR PC;
806 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
807 if (rc == VINF_SUCCESS)
808 {
809 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
810 uint32_t cbOp;
811 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
812
813 /* For now we'll restrict this to rep movsw/d instructions */
814 if ( rc == VINF_SUCCESS
815 && pDis->pCurInstr->opcode == OP_MOVSWD
816 && (pDis->prefix & PREFIX_REP))
817 {
818 CSAMMarkPossibleCodePage(pVM, pvFault);
819 }
820 }
821 }
822# endif /* CSAM_DETECT_NEW_CODE_PAGES */
823
824 /*
825 * Mark this page as safe.
826 */
827 /** @todo not correct for pages that contain both code and data!! */
828 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
829 CSAMMarkPage(pVM, pvFault, true);
830 }
831 }
832# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
833 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
834 if (RT_SUCCESS(rc))
835 {
836 /* The page was successfully synced, return to the guest. */
837 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
838 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
839 return VINF_SUCCESS;
840 }
841 }
842 else /* uErr & X86_TRAP_PF_P: */
843 {
844 /*
845 * Write protected pages are make writable when the guest makes the first
846 * write to it. This happens for pages that are shared, write monitored
847 * and not yet allocated.
848 *
849 * Also, a side effect of not flushing global PDEs are out of sync pages due
850 * to physical monitored regions, that are no longer valid.
851 * Assume for now it only applies to the read/write flag.
852 */
853 if ( RT_SUCCESS(rc)
854 && (uErr & X86_TRAP_PF_RW))
855 {
856 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
857 {
858 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
859 Assert(!PGM_PAGE_IS_ZERO(pPage));
860 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
861
862 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
863 if (rc != VINF_SUCCESS)
864 {
865 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
866 return rc;
867 }
868 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
869 return VINF_EM_NO_MEMORY;
870 }
871
872# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
873 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
874 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
875 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG))
876 {
877 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
878 uint64_t fPageGst;
879 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
880 if ( RT_SUCCESS(rc)
881 && !(fPageGst & X86_PTE_RW))
882 {
883 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
884 if (RT_SUCCESS(rc))
885 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
886 else
887 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
888 return rc;
889 }
890 AssertMsg(RT_SUCCESS(rc), ("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
891 }
892# endif
893 /// @todo count the above case; else
894 if (uErr & X86_TRAP_PF_US)
895 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
896 else /* supervisor */
897 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
898
899 /*
900 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
901 * page is not present, which is not true in this case.
902 */
903 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
904 if (RT_SUCCESS(rc))
905 {
906 /*
907 * Page was successfully synced, return to guest.
908 * First invalidate the page as it might be in the TLB.
909 */
910# if PGM_SHW_TYPE == PGM_TYPE_EPT
911 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
912# else
913 PGM_INVL_PG(pVCpu, pvFault);
914# endif
915# ifdef VBOX_STRICT
916 RTGCPHYS GCPhys2;
917 uint64_t fPageGst;
918 if (!HWACCMIsNestedPagingActive(pVM))
919 {
920 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
921 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%d fPageGst=%RX64\n"));
922 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
923 }
924 uint64_t fPageShw;
925 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
926 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
927# endif /* VBOX_STRICT */
928 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
929 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
930 return VINF_SUCCESS;
931 }
932 }
933
934# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
935# ifdef VBOX_STRICT
936 /*
937 * Check for VMM page flags vs. Guest page flags consistency.
938 * Currently only for debug purposes.
939 */
940 if (RT_SUCCESS(rc))
941 {
942 /* Get guest page flags. */
943 uint64_t fPageGst;
944 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
945 if (RT_SUCCESS(rc))
946 {
947 uint64_t fPageShw;
948 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
949
950 /*
951 * Compare page flags.
952 * Note: we have AVL, A, D bits desynched.
953 */
954 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
955 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
956 }
957 else
958 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
959 }
960 else
961 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
962# endif /* VBOX_STRICT */
963# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
964 }
965 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
966# endif /* PGM_OUT_OF_SYNC_IN_GC */
967 }
968 else /* GCPhys == NIL_RTGCPHYS */
969 {
970 /*
971 * Page not present in Guest OS or invalid page table address.
972 * This is potential virtual page access handler food.
973 *
974 * For the present we'll say that our access handlers don't
975 * work for this case - we've already discarded the page table
976 * not present case which is identical to this.
977 *
978 * When we perchance find we need this, we will probably have AVL
979 * trees (offset based) to operate on and we can measure their speed
980 * agains mapping a page table and probably rearrange this handling
981 * a bit. (Like, searching virtual ranges before checking the
982 * physical address.)
983 */
984 }
985 }
986
987# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
988 /*
989 * Conclusion, this is a guest trap.
990 */
991 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
992 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
993 return VINF_EM_RAW_GUEST_TRAP;
994# else
995 /* present, but not a monitored page; perhaps the guest is probing physical memory */
996 return VINF_EM_RAW_EMULATE_INSTR;
997# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
998
999
1000# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1001
1002 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
1003 return VERR_INTERNAL_ERROR;
1004# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1005}
1006#endif /* !IN_RING3 */
1007
1008
1009/**
1010 * Emulation of the invlpg instruction.
1011 *
1012 *
1013 * @returns VBox status code.
1014 *
1015 * @param pVCpu The VMCPU handle.
1016 * @param GCPtrPage Page to invalidate.
1017 *
1018 * @remark ASSUMES that the guest is updating before invalidating. This order
1019 * isn't required by the CPU, so this is speculative and could cause
1020 * trouble.
1021 * @remark No TLB shootdown is done on any other VCPU as we assume that
1022 * invlpg emulation is the *only* reason for calling this function.
1023 * (The guest has to shoot down TLB entries on other CPUs itself)
1024 * Currently true, but keep in mind!
1025 *
1026 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1027 */
1028PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
1029{
1030#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1031 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1032 && PGM_SHW_TYPE != PGM_TYPE_EPT
1033 int rc;
1034 PVM pVM = pVCpu->CTX_SUFF(pVM);
1035 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1036
1037 Assert(PGMIsLockOwner(pVM));
1038
1039 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1040
1041# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1042 if (pPool->cDirtyPages)
1043 pgmPoolResetDirtyPages(pVM);
1044# endif
1045
1046 /*
1047 * Get the shadow PD entry and skip out if this PD isn't present.
1048 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1049 */
1050# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1051 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1052 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1053
1054 /* Fetch the pgm pool shadow descriptor. */
1055 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1056 Assert(pShwPde);
1057
1058# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1059 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1060 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
1061
1062 /* If the shadow PDPE isn't present, then skip the invalidate. */
1063 if (!pPdptDst->a[iPdpt].n.u1Present)
1064 {
1065 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1066 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1067 return VINF_SUCCESS;
1068 }
1069
1070 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1071 PPGMPOOLPAGE pShwPde = NULL;
1072 PX86PDPAE pPDDst;
1073
1074 /* Fetch the pgm pool shadow descriptor. */
1075 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1076 AssertRCSuccessReturn(rc, rc);
1077 Assert(pShwPde);
1078
1079 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1080 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1081
1082# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1083 /* PML4 */
1084 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1085 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1086 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1087 PX86PDPAE pPDDst;
1088 PX86PDPT pPdptDst;
1089 PX86PML4E pPml4eDst;
1090 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1091 if (rc != VINF_SUCCESS)
1092 {
1093 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1094 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1095 return VINF_SUCCESS;
1096 }
1097 Assert(pPDDst);
1098
1099 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1100 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1101
1102 if (!pPdpeDst->n.u1Present)
1103 {
1104 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1105 return VINF_SUCCESS;
1106 }
1107
1108 /* Fetch the pgm pool shadow descriptor. */
1109 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1110 Assert(pShwPde);
1111
1112# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1113
1114 const SHWPDE PdeDst = *pPdeDst;
1115 if (!PdeDst.n.u1Present)
1116 {
1117 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1118 return VINF_SUCCESS;
1119 }
1120
1121# if defined(IN_RC)
1122 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1123 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1124# endif
1125
1126 /*
1127 * Get the guest PD entry and calc big page.
1128 */
1129# if PGM_GST_TYPE == PGM_TYPE_32BIT
1130 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
1131 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1132 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1133# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1134 unsigned iPDSrc = 0;
1135# if PGM_GST_TYPE == PGM_TYPE_PAE
1136 X86PDPE PdpeSrc;
1137 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1138# else /* AMD64 */
1139 PX86PML4E pPml4eSrc;
1140 X86PDPE PdpeSrc;
1141 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1142# endif
1143 GSTPDE PdeSrc;
1144
1145 if (pPDSrc)
1146 PdeSrc = pPDSrc->a[iPDSrc];
1147 else
1148 PdeSrc.u = 0;
1149# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1150
1151# if PGM_GST_TYPE == PGM_TYPE_32BIT
1152 const bool fIsBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1153# else
1154 const bool fIsBigPage = PdeSrc.b.u1Size;
1155# endif
1156
1157# ifdef IN_RING3
1158 /*
1159 * If a CR3 Sync is pending we may ignore the invalidate page operation
1160 * depending on the kind of sync and if it's a global page or not.
1161 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1162 */
1163# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1164 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1165 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1166 && fIsBigPage
1167 && PdeSrc.b.u1Global
1168 )
1169 )
1170# else
1171 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1172# endif
1173 {
1174 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1175 return VINF_SUCCESS;
1176 }
1177# endif /* IN_RING3 */
1178
1179 /*
1180 * Deal with the Guest PDE.
1181 */
1182 rc = VINF_SUCCESS;
1183 if (PdeSrc.n.u1Present)
1184 {
1185 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1186 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1187# ifndef PGM_WITHOUT_MAPPING
1188 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1189 {
1190 /*
1191 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1192 */
1193 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1194 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1195 pgmLock(pVM);
1196 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1197 pgmUnlock(pVM);
1198 }
1199 else
1200# endif /* !PGM_WITHOUT_MAPPING */
1201 if (!fIsBigPage)
1202 {
1203 /*
1204 * 4KB - page.
1205 */
1206 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1207 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1208
1209# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1210 /* Reset the modification counter (OpenSolaris trashes tlb entries very often) */
1211 if (pShwPage->cModifications)
1212 pShwPage->cModifications = 1;
1213# endif
1214
1215# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1216 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1217 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1218# endif
1219 if (pShwPage->GCPhys == GCPhys)
1220 {
1221# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1222 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1223 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1224 if (pPT->a[iPTEDst].n.u1Present)
1225 {
1226 /* This is very unlikely with caching/monitoring enabled. */
1227 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1228 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1229 }
1230# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1231 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1232 if (RT_SUCCESS(rc))
1233 rc = VINF_SUCCESS;
1234# endif
1235 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1236 PGM_INVL_PG(pVCpu, GCPtrPage);
1237 }
1238 else
1239 {
1240 /*
1241 * The page table address changed.
1242 */
1243 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1244 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1245 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1246 ASMAtomicWriteSize(pPdeDst, 0);
1247 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1248 PGM_INVL_VCPU_TLBS(pVCpu);
1249 }
1250 }
1251 else
1252 {
1253 /*
1254 * 2/4MB - page.
1255 */
1256 /* Before freeing the page, check if anything really changed. */
1257 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1258 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1259# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1260 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1261 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1262# endif
1263 if ( pShwPage->GCPhys == GCPhys
1264 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1265 {
1266 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1267 /** @todo PAT */
1268 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1269 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1270 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1271 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1272 {
1273 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1274 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1275# if defined(IN_RC)
1276 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1277 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1278# endif
1279 return VINF_SUCCESS;
1280 }
1281 }
1282
1283 /*
1284 * Ok, the page table is present and it's been changed in the guest.
1285 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1286 * We could do this for some flushes in GC too, but we need an algorithm for
1287 * deciding which 4MB pages containing code likely to be executed very soon.
1288 */
1289 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1290 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1291 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1292 ASMAtomicWriteSize(pPdeDst, 0);
1293 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1294 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1295 }
1296 }
1297 else
1298 {
1299 /*
1300 * Page directory is not present, mark shadow PDE not present.
1301 */
1302 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1303 {
1304 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1305 ASMAtomicWriteSize(pPdeDst, 0);
1306 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1307 PGM_INVL_PG(pVCpu, GCPtrPage);
1308 }
1309 else
1310 {
1311 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1312 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1313 }
1314 }
1315# if defined(IN_RC)
1316 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1317 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1318# endif
1319 return rc;
1320
1321#else /* guest real and protected mode */
1322 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1323 return VINF_SUCCESS;
1324#endif
1325}
1326
1327
1328/**
1329 * Update the tracking of shadowed pages.
1330 *
1331 * @param pVCpu The VMCPU handle.
1332 * @param pShwPage The shadow page.
1333 * @param HCPhys The physical page we is being dereferenced.
1334 */
1335DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1336{
1337 PVM pVM = pVCpu->CTX_SUFF(pVM);
1338
1339 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1340 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1341
1342 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1343 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1344 * 2. write protect all shadowed pages. I.e. implement caching.
1345 */
1346 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1347
1348 /*
1349 * Find the guest address.
1350 */
1351 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1352 pRam;
1353 pRam = pRam->CTX_SUFF(pNext))
1354 {
1355 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1356 while (iPage-- > 0)
1357 {
1358 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1359 {
1360 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1361
1362 Assert(pShwPage->cPresent);
1363 Assert(pPool->cPresent);
1364 pShwPage->cPresent--;
1365 pPool->cPresent--;
1366
1367 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1368 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1369 return;
1370 }
1371 }
1372 }
1373
1374 for (;;)
1375 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1376}
1377
1378
1379/**
1380 * Update the tracking of shadowed pages.
1381 *
1382 * @param pVCpu The VMCPU handle.
1383 * @param pShwPage The shadow page.
1384 * @param u16 The top 16-bit of the pPage->HCPhys.
1385 * @param pPage Pointer to the guest page. this will be modified.
1386 * @param iPTDst The index into the shadow table.
1387 */
1388DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1389{
1390 PVM pVM = pVCpu->CTX_SUFF(pVM);
1391 /*
1392 * Just deal with the simple first time here.
1393 */
1394 if (!u16)
1395 {
1396 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1397 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1398 }
1399 else
1400 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1401
1402 /* write back */
1403 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1404 PGM_PAGE_SET_TRACKING(pPage, u16);
1405
1406 /* update statistics. */
1407 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1408 pShwPage->cPresent++;
1409 if (pShwPage->iFirstPresent > iPTDst)
1410 pShwPage->iFirstPresent = iPTDst;
1411}
1412
1413
1414/**
1415 * Creates a 4K shadow page for a guest page.
1416 *
1417 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1418 * physical address. The PdeSrc argument only the flags are used. No page structured
1419 * will be mapped in this function.
1420 *
1421 * @param pVCpu The VMCPU handle.
1422 * @param pPteDst Destination page table entry.
1423 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1424 * Can safely assume that only the flags are being used.
1425 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1426 * @param pShwPage Pointer to the shadow page.
1427 * @param iPTDst The index into the shadow table.
1428 *
1429 * @remark Not used for 2/4MB pages!
1430 */
1431DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1432{
1433 if (PteSrc.n.u1Present)
1434 {
1435 PVM pVM = pVCpu->CTX_SUFF(pVM);
1436
1437# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1438 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1439 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64)
1440 if (pShwPage->fDirty)
1441 {
1442 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1443 PX86PTPAE pGstPT;
1444
1445 pGstPT = (PX86PTPAE)&pPool->aDirtyPages[pShwPage->idxDirty][0];
1446 pGstPT->a[iPTDst].u = PteSrc.u;
1447 }
1448# endif
1449 /*
1450 * Find the ram range.
1451 */
1452 PPGMPAGE pPage;
1453 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1454 if (RT_SUCCESS(rc))
1455 {
1456 /* Ignore ballooned pages. Don't return errors or use a fatal assert here as part of a shadow sync range might included ballooned pages. */
1457 if (PGM_PAGE_IS_BALLOONED(pPage))
1458 return;
1459
1460#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1461 /* Try to make the page writable if necessary. */
1462 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1463 && ( PGM_PAGE_IS_ZERO(pPage)
1464 || ( PteSrc.n.u1Write
1465 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1466# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1467 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1468# endif
1469 )
1470 )
1471 )
1472 {
1473 rc = pgmPhysPageMakeWritable(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1474 AssertRC(rc);
1475 }
1476#endif
1477
1478 /** @todo investiage PWT, PCD and PAT. */
1479 /*
1480 * Make page table entry.
1481 */
1482 SHWPTE PteDst;
1483 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1484 {
1485 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1486 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1487 {
1488#if PGM_SHW_TYPE == PGM_TYPE_EPT
1489 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1490 PteDst.n.u1Present = 1;
1491 PteDst.n.u1Execute = 1;
1492 PteDst.n.u1IgnorePAT = 1;
1493 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1494 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1495#else
1496 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1497 | PGM_PAGE_GET_HCPHYS(pPage);
1498#endif
1499 }
1500 else
1501 {
1502 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1503 PteDst.u = 0;
1504 }
1505 /** @todo count these two kinds. */
1506 }
1507 else
1508 {
1509#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1510 /*
1511 * If the page or page directory entry is not marked accessed,
1512 * we mark the page not present.
1513 */
1514 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1515 {
1516 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1517 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1518 PteDst.u = 0;
1519 }
1520 else
1521 /*
1522 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1523 * when the page is modified.
1524 */
1525 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1526 {
1527 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1528 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1529 | PGM_PAGE_GET_HCPHYS(pPage)
1530 | PGM_PTFLAGS_TRACK_DIRTY;
1531 }
1532 else
1533#endif
1534 {
1535 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1536#if PGM_SHW_TYPE == PGM_TYPE_EPT
1537 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1538 PteDst.n.u1Present = 1;
1539 PteDst.n.u1Write = 1;
1540 PteDst.n.u1Execute = 1;
1541 PteDst.n.u1IgnorePAT = 1;
1542 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1543 /* PteDst.n.u1Size = 0 */
1544#else
1545 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1546 | PGM_PAGE_GET_HCPHYS(pPage);
1547#endif
1548 }
1549 }
1550
1551 /*
1552 * Make sure only allocated pages are mapped writable.
1553 */
1554 if ( PteDst.n.u1Write
1555 && PteDst.n.u1Present
1556 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1557 {
1558 /* Still applies to shared pages. */
1559 Assert(!PGM_PAGE_IS_ZERO(pPage));
1560 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1561 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1562 }
1563
1564 /*
1565 * Keep user track up to date.
1566 */
1567 if (PteDst.n.u1Present)
1568 {
1569 if (!pPteDst->n.u1Present)
1570 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1571 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1572 {
1573 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1574 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1575 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1576 }
1577 }
1578 else if (pPteDst->n.u1Present)
1579 {
1580 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1581 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1582 }
1583
1584 /*
1585 * Update statistics and commit the entry.
1586 */
1587#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1588 if (!PteSrc.n.u1Global)
1589 pShwPage->fSeenNonGlobal = true;
1590#endif
1591 ASMAtomicWriteSize(pPteDst, PteDst.u);
1592 }
1593 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1594 /** @todo count these. */
1595 }
1596 else
1597 {
1598 /*
1599 * Page not-present.
1600 */
1601 Log2(("SyncPageWorker: page not present in Pte\n"));
1602 /* Keep user track up to date. */
1603 if (pPteDst->n.u1Present)
1604 {
1605 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1606 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1607 }
1608 ASMAtomicWriteSize(pPteDst, 0);
1609 /** @todo count these. */
1610 }
1611}
1612
1613
1614/**
1615 * Syncs a guest OS page.
1616 *
1617 * There are no conflicts at this point, neither is there any need for
1618 * page table allocations.
1619 *
1620 * @returns VBox status code.
1621 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1622 * @param pVCpu The VMCPU handle.
1623 * @param PdeSrc Page directory entry of the guest.
1624 * @param GCPtrPage Guest context page address.
1625 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1626 * @param uErr Fault error (X86_TRAP_PF_*).
1627 */
1628PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1629{
1630 PVM pVM = pVCpu->CTX_SUFF(pVM);
1631 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1632 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1633
1634 Assert(PGMIsLockOwner(pVM));
1635
1636#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1637 || PGM_GST_TYPE == PGM_TYPE_PAE \
1638 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1639 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1640 && PGM_SHW_TYPE != PGM_TYPE_EPT
1641
1642 /*
1643 * Assert preconditions.
1644 */
1645 Assert(PdeSrc.n.u1Present);
1646 Assert(cPages);
1647# if 0 /* rarely useful; leave for debugging. */
1648 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1649# endif
1650
1651 /*
1652 * Get the shadow PDE, find the shadow page table in the pool.
1653 */
1654# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1655 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1656 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1657
1658 /* Fetch the pgm pool shadow descriptor. */
1659 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1660 Assert(pShwPde);
1661
1662# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1663 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1664 PPGMPOOLPAGE pShwPde = NULL;
1665 PX86PDPAE pPDDst;
1666
1667 /* Fetch the pgm pool shadow descriptor. */
1668 int rc2 = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1669 AssertRCSuccessReturn(rc2, rc2);
1670 Assert(pShwPde);
1671
1672 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1673 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1674
1675# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1676 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1677 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1678 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1679 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1680
1681 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1682 AssertRCSuccessReturn(rc2, rc2);
1683 Assert(pPDDst && pPdptDst);
1684 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1685# endif
1686 SHWPDE PdeDst = *pPdeDst;
1687
1688 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1689 if (!PdeDst.n.u1Present)
1690 {
1691 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE p=%p/%RX64\n", pPdeDst, (uint64_t)PdeDst.u));
1692 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1693 return VINF_SUCCESS; /* force the instruction to be executed again. */
1694 }
1695
1696 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1697 Assert(pShwPage);
1698
1699# if PGM_GST_TYPE == PGM_TYPE_AMD64
1700 /* Fetch the pgm pool shadow descriptor. */
1701 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1702 Assert(pShwPde);
1703# endif
1704
1705# if defined(IN_RC)
1706 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1707 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1708# endif
1709
1710 /*
1711 * Check that the page is present and that the shadow PDE isn't out of sync.
1712 */
1713# if PGM_GST_TYPE == PGM_TYPE_32BIT
1714 const bool fBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1715# else
1716 const bool fBigPage = PdeSrc.b.u1Size;
1717# endif
1718 RTGCPHYS GCPhys;
1719 if (!fBigPage)
1720 {
1721 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1722# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1723 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1724 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1725# endif
1726 }
1727 else
1728 {
1729 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1730# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1731 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1732 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1733# endif
1734 }
1735 if ( pShwPage->GCPhys == GCPhys
1736 && PdeSrc.n.u1Present
1737 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1738 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1739# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1740 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !CPUMIsGuestNXEnabled(pVCpu))
1741# endif
1742 )
1743 {
1744 /*
1745 * Check that the PDE is marked accessed already.
1746 * Since we set the accessed bit *before* getting here on a #PF, this
1747 * check is only meant for dealing with non-#PF'ing paths.
1748 */
1749 if (PdeSrc.n.u1Accessed)
1750 {
1751 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1752 if (!fBigPage)
1753 {
1754 /*
1755 * 4KB Page - Map the guest page table.
1756 */
1757 PGSTPT pPTSrc;
1758 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1759 if (RT_SUCCESS(rc))
1760 {
1761# ifdef PGM_SYNC_N_PAGES
1762 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1763 if ( cPages > 1
1764 && !(uErr & X86_TRAP_PF_P)
1765 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1766 {
1767 /*
1768 * This code path is currently only taken when the caller is PGMTrap0eHandler
1769 * for non-present pages!
1770 *
1771 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1772 * deal with locality.
1773 */
1774 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1775# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1776 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1777 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1778# else
1779 const unsigned offPTSrc = 0;
1780# endif
1781 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1782 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1783 iPTDst = 0;
1784 else
1785 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1786 for (; iPTDst < iPTDstEnd; iPTDst++)
1787 {
1788 if (!pPTDst->a[iPTDst].n.u1Present)
1789 {
1790 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1791 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1792 NOREF(GCPtrCurPage);
1793#ifndef IN_RING0
1794 /*
1795 * Assuming kernel code will be marked as supervisor - and not as user level
1796 * and executed using a conforming code selector - And marked as readonly.
1797 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1798 */
1799 PPGMPAGE pPage;
1800 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1801 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1802 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
1803 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1804 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1805 )
1806#endif /* else: CSAM not active */
1807 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1808 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1809 GCPtrCurPage, PteSrc.n.u1Present,
1810 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1811 PteSrc.n.u1User & PdeSrc.n.u1User,
1812 (uint64_t)PteSrc.u,
1813 (uint64_t)pPTDst->a[iPTDst].u,
1814 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1815 }
1816 }
1817 }
1818 else
1819# endif /* PGM_SYNC_N_PAGES */
1820 {
1821 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1822 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1823 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1824 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1825 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1826 GCPtrPage, PteSrc.n.u1Present,
1827 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1828 PteSrc.n.u1User & PdeSrc.n.u1User,
1829 (uint64_t)PteSrc.u,
1830 (uint64_t)pPTDst->a[iPTDst].u,
1831 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1832 }
1833 }
1834 else /* MMIO or invalid page: emulated in #PF handler. */
1835 {
1836 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1837 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1838 }
1839 }
1840 else
1841 {
1842 /*
1843 * 4/2MB page - lazy syncing shadow 4K pages.
1844 * (There are many causes of getting here, it's no longer only CSAM.)
1845 */
1846 /* Calculate the GC physical address of this 4KB shadow page. */
1847 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1848 /* Find ram range. */
1849 PPGMPAGE pPage;
1850 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1851 if (RT_SUCCESS(rc))
1852 {
1853 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
1854
1855# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1856 /* Try to make the page writable if necessary. */
1857 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1858 && ( PGM_PAGE_IS_ZERO(pPage)
1859 || ( PdeSrc.n.u1Write
1860 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1861# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1862 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1863# endif
1864 )
1865 )
1866 )
1867 {
1868 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1869 AssertRC(rc);
1870 }
1871# endif
1872
1873 /*
1874 * Make shadow PTE entry.
1875 */
1876 SHWPTE PteDst;
1877 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1878 | PGM_PAGE_GET_HCPHYS(pPage);
1879 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1880 {
1881 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1882 PteDst.n.u1Write = 0;
1883 else
1884 PteDst.u = 0;
1885 }
1886
1887 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1888 if ( PteDst.n.u1Present
1889 && !pPTDst->a[iPTDst].n.u1Present)
1890 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1891
1892 /* Make sure only allocated pages are mapped writable. */
1893 if ( PteDst.n.u1Write
1894 && PteDst.n.u1Present
1895 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1896 {
1897 /* Still applies to shared pages. */
1898 Assert(!PGM_PAGE_IS_ZERO(pPage));
1899 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1900 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1901 }
1902
1903 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1904
1905 /*
1906 * If the page is not flagged as dirty and is writable, then make it read-only
1907 * at PD level, so we can set the dirty bit when the page is modified.
1908 *
1909 * ASSUMES that page access handlers are implemented on page table entry level.
1910 * Thus we will first catch the dirty access and set PDE.D and restart. If
1911 * there is an access handler, we'll trap again and let it work on the problem.
1912 */
1913 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1914 * As for invlpg, it simply frees the whole shadow PT.
1915 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1916 if ( !PdeSrc.b.u1Dirty
1917 && PdeSrc.b.u1Write)
1918 {
1919 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1920 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1921 PdeDst.n.u1Write = 0;
1922 }
1923 else
1924 {
1925 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1926 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1927 }
1928 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1929 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1930 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1931 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1932 }
1933 else
1934 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1935 }
1936# if defined(IN_RC)
1937 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1938 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1939# endif
1940 return VINF_SUCCESS;
1941 }
1942 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1943 }
1944 else
1945 {
1946 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1947 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1948 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1949 }
1950
1951 /*
1952 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1953 * Yea, I'm lazy.
1954 */
1955 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1956 ASMAtomicWriteSize(pPdeDst, 0);
1957
1958# if defined(IN_RC)
1959 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1960 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1961# endif
1962 PGM_INVL_VCPU_TLBS(pVCpu);
1963 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1964
1965#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1966 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1967 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1968 && !defined(IN_RC)
1969
1970# ifdef PGM_SYNC_N_PAGES
1971 /*
1972 * Get the shadow PDE, find the shadow page table in the pool.
1973 */
1974# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1975 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
1976
1977# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1978 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
1979
1980# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1981 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1982 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1983 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1984 X86PDEPAE PdeDst;
1985 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1986
1987 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1988 AssertRCSuccessReturn(rc, rc);
1989 Assert(pPDDst && pPdptDst);
1990 PdeDst = pPDDst->a[iPDDst];
1991# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1992 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1993 PEPTPD pPDDst;
1994 EPTPDE PdeDst;
1995
1996 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
1997 if (rc != VINF_SUCCESS)
1998 {
1999 AssertRC(rc);
2000 return rc;
2001 }
2002 Assert(pPDDst);
2003 PdeDst = pPDDst->a[iPDDst];
2004# endif
2005 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2006 if (!PdeDst.n.u1Present)
2007 {
2008 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2009 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2010 return VINF_SUCCESS; /* force the instruction to be executed again. */
2011 }
2012
2013 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2014 if (PdeDst.n.u1Size)
2015 {
2016 Assert(HWACCMIsNestedPagingActive(pVM));
2017 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2018 return VINF_SUCCESS;
2019 }
2020
2021 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2022 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2023
2024 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2025 if ( cPages > 1
2026 && !(uErr & X86_TRAP_PF_P)
2027 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2028 {
2029 /*
2030 * This code path is currently only taken when the caller is PGMTrap0eHandler
2031 * for non-present pages!
2032 *
2033 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2034 * deal with locality.
2035 */
2036 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2037 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2038 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2039 iPTDst = 0;
2040 else
2041 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2042 for (; iPTDst < iPTDstEnd; iPTDst++)
2043 {
2044 if (!pPTDst->a[iPTDst].n.u1Present)
2045 {
2046 GSTPTE PteSrc;
2047
2048 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2049
2050 /* Fake the page table entry */
2051 PteSrc.u = GCPtrCurPage;
2052 PteSrc.n.u1Present = 1;
2053 PteSrc.n.u1Dirty = 1;
2054 PteSrc.n.u1Accessed = 1;
2055 PteSrc.n.u1Write = 1;
2056 PteSrc.n.u1User = 1;
2057
2058 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2059
2060 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2061 GCPtrCurPage, PteSrc.n.u1Present,
2062 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2063 PteSrc.n.u1User & PdeSrc.n.u1User,
2064 (uint64_t)PteSrc.u,
2065 (uint64_t)pPTDst->a[iPTDst].u,
2066 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2067
2068 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2069 break;
2070 }
2071 else
2072 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2073 }
2074 }
2075 else
2076# endif /* PGM_SYNC_N_PAGES */
2077 {
2078 GSTPTE PteSrc;
2079 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2080 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2081
2082 /* Fake the page table entry */
2083 PteSrc.u = GCPtrCurPage;
2084 PteSrc.n.u1Present = 1;
2085 PteSrc.n.u1Dirty = 1;
2086 PteSrc.n.u1Accessed = 1;
2087 PteSrc.n.u1Write = 1;
2088 PteSrc.n.u1User = 1;
2089 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2090
2091 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2092 GCPtrPage, PteSrc.n.u1Present,
2093 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2094 PteSrc.n.u1User & PdeSrc.n.u1User,
2095 (uint64_t)PteSrc.u,
2096 (uint64_t)pPTDst->a[iPTDst].u,
2097 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2098 }
2099 return VINF_SUCCESS;
2100
2101#else
2102 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2103 return VERR_INTERNAL_ERROR;
2104#endif
2105}
2106
2107
2108#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2109/**
2110 * Investigate page fault and handle write protection page faults caused by
2111 * dirty bit tracking.
2112 *
2113 * @returns VBox status code.
2114 * @param pVCpu The VMCPU handle.
2115 * @param uErr Page fault error code.
2116 * @param pPdeSrc Guest page directory entry.
2117 * @param GCPtrPage Guest context page address.
2118 */
2119PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2120{
2121 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2122 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2123 bool fMaybeWriteProtFault = fWriteFault && (fUserLevelFault || CPUMIsGuestR0WriteProtEnabled(pVCpu));
2124# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2125 bool fMaybeNXEFault = (uErr & X86_TRAP_PF_ID) && CPUMIsGuestNXEnabled(pVCpu);
2126# endif
2127 unsigned uPageFaultLevel;
2128 int rc;
2129 PVM pVM = pVCpu->CTX_SUFF(pVM);
2130
2131 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2132
2133# if PGM_GST_TYPE == PGM_TYPE_PAE \
2134 || PGM_GST_TYPE == PGM_TYPE_AMD64
2135
2136# if PGM_GST_TYPE == PGM_TYPE_AMD64
2137 PX86PML4E pPml4eSrc;
2138 PX86PDPE pPdpeSrc;
2139
2140 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc);
2141 Assert(pPml4eSrc);
2142
2143 /*
2144 * Real page fault? (PML4E level)
2145 */
2146 if ( (uErr & X86_TRAP_PF_RSVD)
2147 || !pPml4eSrc->n.u1Present
2148 || (fMaybeWriteProtFault && !pPml4eSrc->n.u1Write)
2149 || (fMaybeNXEFault && pPml4eSrc->n.u1NoExecute)
2150 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2151 )
2152 {
2153 uPageFaultLevel = 0;
2154 goto l_UpperLevelPageFault;
2155 }
2156 Assert(pPdpeSrc);
2157
2158# else /* PAE */
2159 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVCpu->pgm.s, GCPtrPage);
2160# endif /* PAE */
2161
2162 /*
2163 * Real page fault? (PDPE level)
2164 */
2165 if ( (uErr & X86_TRAP_PF_RSVD)
2166 || !pPdpeSrc->n.u1Present
2167# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2168 || (fMaybeWriteProtFault && !pPdpeSrc->lm.u1Write)
2169 || (fMaybeNXEFault && pPdpeSrc->lm.u1NoExecute)
2170 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2171# endif
2172 )
2173 {
2174 uPageFaultLevel = 1;
2175 goto l_UpperLevelPageFault;
2176 }
2177# endif
2178
2179 /*
2180 * Real page fault? (PDE level)
2181 */
2182 if ( (uErr & X86_TRAP_PF_RSVD)
2183 || !pPdeSrc->n.u1Present
2184 || (fMaybeWriteProtFault && !pPdeSrc->n.u1Write)
2185# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2186 || (fMaybeNXEFault && pPdeSrc->n.u1NoExecute)
2187# endif
2188 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2189 {
2190 uPageFaultLevel = 2;
2191 goto l_UpperLevelPageFault;
2192 }
2193
2194 /*
2195 * First check the easy case where the page directory has been marked read-only to track
2196 * the dirty bit of an emulated BIG page
2197 */
2198 if ( pPdeSrc->b.u1Size
2199# if PGM_GST_TYPE == PGM_TYPE_32BIT
2200 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2201# endif
2202 )
2203 {
2204 /* Mark guest page directory as accessed */
2205# if PGM_GST_TYPE == PGM_TYPE_AMD64
2206 pPml4eSrc->n.u1Accessed = 1;
2207 pPdpeSrc->lm.u1Accessed = 1;
2208# endif
2209 pPdeSrc->b.u1Accessed = 1;
2210
2211 /*
2212 * Only write protection page faults are relevant here.
2213 */
2214 if (fWriteFault)
2215 {
2216 /* Mark guest page directory as dirty (BIG page only). */
2217 pPdeSrc->b.u1Dirty = 1;
2218 }
2219 return VINF_SUCCESS;
2220 }
2221 /* else: 4KB page table */
2222
2223 /*
2224 * Map the guest page table.
2225 */
2226 PGSTPT pPTSrc;
2227 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2228 if (RT_SUCCESS(rc))
2229 {
2230 /*
2231 * Real page fault?
2232 */
2233 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2234 const GSTPTE PteSrc = *pPteSrc;
2235 if ( !PteSrc.n.u1Present
2236 || (fMaybeWriteProtFault && !PteSrc.n.u1Write)
2237# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2238 || (fMaybeNXEFault && PteSrc.n.u1NoExecute)
2239# endif
2240 || (fUserLevelFault && !PteSrc.n.u1User)
2241 )
2242 {
2243 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2244 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2245
2246 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2247 * See the 2nd case above as well.
2248 */
2249 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2250 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2251
2252 return VINF_EM_RAW_GUEST_TRAP;
2253 }
2254 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2255
2256 /*
2257 * Set the accessed bits in the page directory and the page table.
2258 */
2259# if PGM_GST_TYPE == PGM_TYPE_AMD64
2260 pPml4eSrc->n.u1Accessed = 1;
2261 pPdpeSrc->lm.u1Accessed = 1;
2262# endif
2263 pPdeSrc->n.u1Accessed = 1;
2264 pPteSrc->n.u1Accessed = 1;
2265
2266 /*
2267 * Only write protection page faults are relevant here.
2268 */
2269 if (fWriteFault)
2270 {
2271 /* Write access, so mark guest entry as dirty. */
2272# ifdef VBOX_WITH_STATISTICS
2273 if (!pPteSrc->n.u1Dirty)
2274 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2275 else
2276 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2277# endif
2278
2279 pPteSrc->n.u1Dirty = 1;
2280 }
2281 return VINF_SUCCESS;
2282 }
2283 AssertRC(rc);
2284 return rc;
2285
2286
2287l_UpperLevelPageFault:
2288 /*
2289 * Pagefault detected while checking the PML4E, PDPE or PDE.
2290 * Single exit handler to get rid of duplicate code paths.
2291 */
2292 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2293 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2294
2295 if ( 1
2296# if PGM_GST_TYPE == PGM_TYPE_AMD64
2297 && pPml4eSrc->n.u1Present
2298# endif
2299# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2300 && pPdpeSrc->n.u1Present
2301# endif
2302 && pPdeSrc->n.u1Present)
2303 {
2304 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2305 if ( pPdeSrc->b.u1Size
2306# if PGM_GST_TYPE == PGM_TYPE_32BIT
2307 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
2308# endif
2309 )
2310 {
2311 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2312 }
2313 else
2314 {
2315 /*
2316 * Map the guest page table.
2317 */
2318 PGSTPT pPTSrc2;
2319 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc2);
2320 if (RT_SUCCESS(rc))
2321 {
2322 PGSTPTE pPteSrc = &pPTSrc2->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2323 if (pPteSrc->n.u1Present)
2324 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2325 }
2326 AssertRC(rc);
2327 }
2328 }
2329 return VINF_EM_RAW_GUEST_TRAP;
2330}
2331
2332/**
2333 * Handle dirty bit tracking faults.
2334 *
2335 * @returns VBox status code.
2336 * @param pVCpu The VMCPU handle.
2337 * @param uErr Page fault error code.
2338 * @param pPdeSrc Guest page directory entry.
2339 * @param pPdeDst Shadow page directory entry.
2340 * @param GCPtrPage Guest context page address.
2341 */
2342PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2343{
2344# if PGM_GST_TYPE == PGM_TYPE_32BIT
2345 const bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
2346# else
2347 const bool fBigPagesSupported = true;
2348# endif
2349 PVM pVM = pVCpu->CTX_SUFF(pVM);
2350 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2351
2352 Assert(PGMIsLockOwner(pVM));
2353
2354 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2355 {
2356 if ( pPdeDst->n.u1Present
2357 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2358 {
2359 SHWPDE PdeDst = *pPdeDst;
2360
2361 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2362 Assert(pPdeSrc->b.u1Write);
2363
2364 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2365 * fault again and take this path to only invalidate the entry.
2366 */
2367 PdeDst.n.u1Write = 1;
2368 PdeDst.n.u1Accessed = 1;
2369 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2370 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2371 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2372 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2373 }
2374# ifdef IN_RING0
2375 else
2376 /* Check for stale TLB entry; only applies to the SMP guest case. */
2377 if ( pVM->cCpus > 1
2378 && pPdeDst->n.u1Write
2379 && pPdeDst->n.u1Accessed)
2380 {
2381 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2382 if (pShwPage)
2383 {
2384 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2385 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2386 if ( pPteDst->n.u1Present
2387 && pPteDst->n.u1Write)
2388 {
2389 /* Stale TLB entry. */
2390 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2391 PGM_INVL_PG(pVCpu, GCPtrPage);
2392 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2393 }
2394 }
2395 }
2396# endif /* IN_RING0 */
2397 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2398 }
2399
2400 /*
2401 * Map the guest page table.
2402 */
2403 PGSTPT pPTSrc;
2404 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2405 if (RT_SUCCESS(rc))
2406 {
2407 if (pPdeDst->n.u1Present)
2408 {
2409 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2410 const GSTPTE PteSrc = *pPteSrc;
2411#ifndef IN_RING0
2412 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2413 * Our individual shadow handlers will provide more information and force a fatal exit.
2414 */
2415 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2416 {
2417 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2418 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2419 }
2420#endif
2421 /*
2422 * Map shadow page table.
2423 */
2424 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2425 if (pShwPage)
2426 {
2427 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2428 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2429 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2430 {
2431 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2432 {
2433 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2434 SHWPTE PteDst = *pPteDst;
2435
2436 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2437 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2438
2439 Assert(pPteSrc->n.u1Write);
2440
2441 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2442 * fault again and take this path to only invalidate the entry.
2443 */
2444 if (RT_LIKELY(pPage))
2445 {
2446 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2447 {
2448 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2449 PteDst.n.u1Write = 0;
2450 }
2451 else
2452 {
2453 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2454 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2455 {
2456 rc = pgmPhysPageMakeWritable(pVM, pPage, pPteSrc->u & GST_PTE_PG_MASK);
2457 AssertRC(rc);
2458 }
2459 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2460 {
2461 PteDst.n.u1Write = 1;
2462 }
2463 else
2464 {
2465 /* Still applies to shared pages. */
2466 Assert(!PGM_PAGE_IS_ZERO(pPage));
2467 PteDst.n.u1Write = 0;
2468 }
2469 }
2470 }
2471 else
2472 PteDst.n.u1Write = 1;
2473
2474 PteDst.n.u1Dirty = 1;
2475 PteDst.n.u1Accessed = 1;
2476 PteDst.au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2477 ASMAtomicWriteSize(pPteDst, PteDst.u);
2478 PGM_INVL_PG(pVCpu, GCPtrPage);
2479 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2480 }
2481# ifdef IN_RING0
2482 else
2483 /* Check for stale TLB entry; only applies to the SMP guest case. */
2484 if ( pVM->cCpus > 1
2485 && pPteDst->n.u1Write == 1
2486 && pPteDst->n.u1Accessed == 1)
2487 {
2488 /* Stale TLB entry. */
2489 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2490 PGM_INVL_PG(pVCpu, GCPtrPage);
2491 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2492 }
2493# endif
2494 }
2495 }
2496 else
2497 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2498 }
2499 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2500 }
2501 AssertRC(rc);
2502 return rc;
2503}
2504#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2505
2506
2507/**
2508 * Sync a shadow page table.
2509 *
2510 * The shadow page table is not present. This includes the case where
2511 * there is a conflict with a mapping.
2512 *
2513 * @returns VBox status code.
2514 * @param pVCpu The VMCPU handle.
2515 * @param iPD Page directory index.
2516 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2517 * Assume this is a temporary mapping.
2518 * @param GCPtrPage GC Pointer of the page that caused the fault
2519 */
2520PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2521{
2522 PVM pVM = pVCpu->CTX_SUFF(pVM);
2523 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2524
2525 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2526#if 0 /* rarely useful; leave for debugging. */
2527 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2528#endif
2529 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2530
2531 Assert(PGMIsLocked(pVM));
2532
2533#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2534 || PGM_GST_TYPE == PGM_TYPE_PAE \
2535 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2536 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2537 && PGM_SHW_TYPE != PGM_TYPE_EPT
2538
2539 int rc = VINF_SUCCESS;
2540
2541 /*
2542 * Validate input a little bit.
2543 */
2544 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2545# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2546 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2547 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2548
2549 /* Fetch the pgm pool shadow descriptor. */
2550 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2551 Assert(pShwPde);
2552
2553# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2554 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2555 PPGMPOOLPAGE pShwPde = NULL;
2556 PX86PDPAE pPDDst;
2557 PSHWPDE pPdeDst;
2558
2559 /* Fetch the pgm pool shadow descriptor. */
2560 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2561 AssertRCSuccessReturn(rc, rc);
2562 Assert(pShwPde);
2563
2564 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2565 pPdeDst = &pPDDst->a[iPDDst];
2566
2567# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2568 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2569 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2570 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2571 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2572 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2573 AssertRCSuccessReturn(rc, rc);
2574 Assert(pPDDst);
2575 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2576# endif
2577 SHWPDE PdeDst = *pPdeDst;
2578
2579# if PGM_GST_TYPE == PGM_TYPE_AMD64
2580 /* Fetch the pgm pool shadow descriptor. */
2581 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2582 Assert(pShwPde);
2583# endif
2584
2585# ifndef PGM_WITHOUT_MAPPINGS
2586 /*
2587 * Check for conflicts.
2588 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2589 * HC: Simply resolve the conflict.
2590 */
2591 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2592 {
2593 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2594# ifndef IN_RING3
2595 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2596 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2597 return VERR_ADDRESS_CONFLICT;
2598# else
2599 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2600 Assert(pMapping);
2601# if PGM_GST_TYPE == PGM_TYPE_32BIT
2602 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2603# elif PGM_GST_TYPE == PGM_TYPE_PAE
2604 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2605# else
2606 AssertFailed(); /* can't happen for amd64 */
2607# endif
2608 if (RT_FAILURE(rc))
2609 {
2610 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2611 return rc;
2612 }
2613 PdeDst = *pPdeDst;
2614# endif
2615 }
2616# endif /* !PGM_WITHOUT_MAPPINGS */
2617 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2618
2619# if defined(IN_RC)
2620 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2621 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2622# endif
2623
2624 /*
2625 * Sync page directory entry.
2626 */
2627 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2628 if (PdeSrc.n.u1Present)
2629 {
2630 /*
2631 * Allocate & map the page table.
2632 */
2633 PSHWPT pPTDst;
2634# if PGM_GST_TYPE == PGM_TYPE_32BIT
2635 const bool fPageTable = !PdeSrc.b.u1Size || !CPUMIsGuestPageSizeExtEnabled(pVCpu);
2636# else
2637 const bool fPageTable = !PdeSrc.b.u1Size;
2638# endif
2639 PPGMPOOLPAGE pShwPage;
2640 RTGCPHYS GCPhys;
2641 if (fPageTable)
2642 {
2643 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2644# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2645 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2646 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2647# endif
2648 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2649 }
2650 else
2651 {
2652 PGMPOOLACCESS enmAccess;
2653# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2654 const bool fNoExecute = PdeSrc.n.u1NoExecute && CPUMIsGuestNXEnabled(pVCpu);
2655# else
2656 const bool fNoExecute = false;
2657# endif
2658
2659 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2660# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2661 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2662 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2663# endif
2664 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2665 if (PdeSrc.n.u1User)
2666 {
2667 if (PdeSrc.n.u1Write)
2668 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2669 else
2670 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2671 }
2672 else
2673 {
2674 if (PdeSrc.n.u1Write)
2675 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2676 else
2677 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2678 }
2679 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2680 }
2681 if (rc == VINF_SUCCESS)
2682 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2683 else if (rc == VINF_PGM_CACHED_PAGE)
2684 {
2685 /*
2686 * The PT was cached, just hook it up.
2687 */
2688 if (fPageTable)
2689 PdeDst.u = pShwPage->Core.Key
2690 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2691 else
2692 {
2693 PdeDst.u = pShwPage->Core.Key
2694 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2695 /* (see explanation and assumptions further down.) */
2696 if ( !PdeSrc.b.u1Dirty
2697 && PdeSrc.b.u1Write)
2698 {
2699 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2700 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2701 PdeDst.b.u1Write = 0;
2702 }
2703 }
2704 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2705# if defined(IN_RC)
2706 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2707# endif
2708 return VINF_SUCCESS;
2709 }
2710 else if (rc == VERR_PGM_POOL_FLUSHED)
2711 {
2712 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2713# if defined(IN_RC)
2714 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2715# endif
2716 return VINF_PGM_SYNC_CR3;
2717 }
2718 else
2719 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2720 PdeDst.u &= X86_PDE_AVL_MASK;
2721 PdeDst.u |= pShwPage->Core.Key;
2722
2723 /*
2724 * Page directory has been accessed (this is a fault situation, remember).
2725 */
2726 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2727 if (fPageTable)
2728 {
2729 /*
2730 * Page table - 4KB.
2731 *
2732 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2733 */
2734 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2735 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2736 PGSTPT pPTSrc;
2737 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2738 if (RT_SUCCESS(rc))
2739 {
2740 /*
2741 * Start by syncing the page directory entry so CSAM's TLB trick works.
2742 */
2743 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2744 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2745 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2746# if defined(IN_RC)
2747 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2748# endif
2749
2750 /*
2751 * Directory/page user or supervisor privilege: (same goes for read/write)
2752 *
2753 * Directory Page Combined
2754 * U/S U/S U/S
2755 * 0 0 0
2756 * 0 1 0
2757 * 1 0 0
2758 * 1 1 1
2759 *
2760 * Simple AND operation. Table listed for completeness.
2761 *
2762 */
2763 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2764# ifdef PGM_SYNC_N_PAGES
2765 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2766 unsigned iPTDst = iPTBase;
2767 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2768 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2769 iPTDst = 0;
2770 else
2771 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2772# else /* !PGM_SYNC_N_PAGES */
2773 unsigned iPTDst = 0;
2774 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2775# endif /* !PGM_SYNC_N_PAGES */
2776# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2777 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2778 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2779# else
2780 const unsigned offPTSrc = 0;
2781# endif
2782 for (; iPTDst < iPTDstEnd; iPTDst++)
2783 {
2784 const unsigned iPTSrc = iPTDst + offPTSrc;
2785 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2786
2787 if (PteSrc.n.u1Present) /* we've already cleared it above */
2788 {
2789# ifndef IN_RING0
2790 /*
2791 * Assuming kernel code will be marked as supervisor - and not as user level
2792 * and executed using a conforming code selector - And marked as readonly.
2793 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2794 */
2795 PPGMPAGE pPage;
2796 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2797 || !CSAMDoesPageNeedScanning(pVM, (iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT))
2798 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2799 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2800 )
2801# endif
2802 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2803 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2804 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2805 PteSrc.n.u1Present,
2806 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2807 PteSrc.n.u1User & PdeSrc.n.u1User,
2808 (uint64_t)PteSrc.u,
2809 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2810 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2811 }
2812 } /* for PTEs */
2813 }
2814 }
2815 else
2816 {
2817 /*
2818 * Big page - 2/4MB.
2819 *
2820 * We'll walk the ram range list in parallel and optimize lookups.
2821 * We will only sync on shadow page table at a time.
2822 */
2823 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2824
2825 /**
2826 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2827 */
2828
2829 /*
2830 * Start by syncing the page directory entry.
2831 */
2832 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2833 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2834
2835 /*
2836 * If the page is not flagged as dirty and is writable, then make it read-only
2837 * at PD level, so we can set the dirty bit when the page is modified.
2838 *
2839 * ASSUMES that page access handlers are implemented on page table entry level.
2840 * Thus we will first catch the dirty access and set PDE.D and restart. If
2841 * there is an access handler, we'll trap again and let it work on the problem.
2842 */
2843 /** @todo move the above stuff to a section in the PGM documentation. */
2844 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2845 if ( !PdeSrc.b.u1Dirty
2846 && PdeSrc.b.u1Write)
2847 {
2848 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2849 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2850 PdeDst.b.u1Write = 0;
2851 }
2852 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2853# if defined(IN_RC)
2854 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2855# endif
2856
2857 /*
2858 * Fill the shadow page table.
2859 */
2860 /* Get address and flags from the source PDE. */
2861 SHWPTE PteDstBase;
2862 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2863
2864 /* Loop thru the entries in the shadow PT. */
2865 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2866 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2867 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2868 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2869 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2870 unsigned iPTDst = 0;
2871 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2872 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2873 {
2874 /* Advance ram range list. */
2875 while (pRam && GCPhys > pRam->GCPhysLast)
2876 pRam = pRam->CTX_SUFF(pNext);
2877 if (pRam && GCPhys >= pRam->GCPhys)
2878 {
2879 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2880 do
2881 {
2882 /* Make shadow PTE. */
2883 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2884 SHWPTE PteDst;
2885
2886# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2887 /* Try to make the page writable if necessary. */
2888 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2889 && ( PGM_PAGE_IS_ZERO(pPage)
2890 || ( PteDstBase.n.u1Write
2891 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2892# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2893 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2894# endif
2895 && !PGM_PAGE_IS_BALLOONED(pPage))
2896 )
2897 )
2898 {
2899 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2900 AssertRCReturn(rc, rc);
2901 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2902 break;
2903 }
2904# endif
2905
2906 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2907 {
2908 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2909 {
2910 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2911 PteDst.n.u1Write = 0;
2912 }
2913 else
2914 PteDst.u = 0;
2915 }
2916 else
2917 if (PGM_PAGE_IS_BALLOONED(pPage))
2918 {
2919 /* Skip ballooned pages. */
2920 PteDst.u = 0;
2921 }
2922# ifndef IN_RING0
2923 /*
2924 * Assuming kernel code will be marked as supervisor and not as user level and executed
2925 * using a conforming code selector. Don't check for readonly, as that implies the whole
2926 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2927 */
2928 else if ( !PdeSrc.n.u1User
2929 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
2930 PteDst.u = 0;
2931# endif
2932 else
2933 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2934
2935 /* Only map writable pages writable. */
2936 if ( PteDst.n.u1Write
2937 && PteDst.n.u1Present
2938 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2939 {
2940 /* Still applies to shared pages. */
2941 Assert(!PGM_PAGE_IS_ZERO(pPage));
2942 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2943 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2944 }
2945
2946 if (PteDst.n.u1Present)
2947 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2948
2949 /* commit it */
2950 pPTDst->a[iPTDst] = PteDst;
2951 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2952 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2953 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2954
2955 /* advance */
2956 GCPhys += PAGE_SIZE;
2957 iHCPage++;
2958 iPTDst++;
2959 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2960 && GCPhys <= pRam->GCPhysLast);
2961 }
2962 else if (pRam)
2963 {
2964 Log(("Invalid pages at %RGp\n", GCPhys));
2965 do
2966 {
2967 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2968 GCPhys += PAGE_SIZE;
2969 iPTDst++;
2970 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2971 && GCPhys < pRam->GCPhys);
2972 }
2973 else
2974 {
2975 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2976 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2977 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2978 }
2979 } /* while more PTEs */
2980 } /* 4KB / 4MB */
2981 }
2982 else
2983 AssertRelease(!PdeDst.n.u1Present);
2984
2985 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2986 if (RT_FAILURE(rc))
2987 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2988 return rc;
2989
2990#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2991 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2992 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2993 && !defined(IN_RC)
2994
2995 /*
2996 * Validate input a little bit.
2997 */
2998 int rc = VINF_SUCCESS;
2999# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3000 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3001 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3002
3003 /* Fetch the pgm pool shadow descriptor. */
3004 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
3005 Assert(pShwPde);
3006
3007# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3008 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3009 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
3010 PX86PDPAE pPDDst;
3011 PSHWPDE pPdeDst;
3012
3013 /* Fetch the pgm pool shadow descriptor. */
3014 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
3015 AssertRCSuccessReturn(rc, rc);
3016 Assert(pShwPde);
3017
3018 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
3019 pPdeDst = &pPDDst->a[iPDDst];
3020
3021# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3022 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
3023 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3024 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3025 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3026 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3027 AssertRCSuccessReturn(rc, rc);
3028 Assert(pPDDst);
3029 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3030
3031 /* Fetch the pgm pool shadow descriptor. */
3032 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3033 Assert(pShwPde);
3034
3035# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3036 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3037 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3038 PEPTPD pPDDst;
3039 PEPTPDPT pPdptDst;
3040
3041 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3042 if (rc != VINF_SUCCESS)
3043 {
3044 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3045 AssertRC(rc);
3046 return rc;
3047 }
3048 Assert(pPDDst);
3049 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3050
3051 /* Fetch the pgm pool shadow descriptor. */
3052 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3053 Assert(pShwPde);
3054# endif
3055 SHWPDE PdeDst = *pPdeDst;
3056
3057 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
3058 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3059
3060# if defined(PGM_WITH_LARGE_PAGES) && (PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE)
3061# if (PGM_SHW_TYPE != PGM_TYPE_EPT) /* PGM_TYPE_EPT implies nested paging */
3062 if (HWACCMIsNestedPagingActive(pVM))
3063# endif
3064 {
3065 PPGMPAGE pPage;
3066
3067 /* Check if we allocated a big page before for this 2 MB range. */
3068 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
3069 if (RT_SUCCESS(rc))
3070 {
3071 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3072
3073 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3074 {
3075 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3076 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3077 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3078 }
3079 else
3080 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
3081 {
3082 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3083 rc = pgmPhysIsValidLargePage(pVM, GCPtrPage, pPage);
3084 if (RT_SUCCESS(rc))
3085 {
3086 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3087 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3088 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3089 }
3090 }
3091 else
3092 if (PGMIsUsingLargePages(pVM))
3093 {
3094 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3095 if (RT_SUCCESS(rc))
3096 {
3097 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3098 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3099 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3100 }
3101 else
3102 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3103 }
3104
3105 if (HCPhys != NIL_RTHCPHYS)
3106 {
3107 PdeDst.u &= X86_PDE_AVL_MASK;
3108 PdeDst.u |= HCPhys;
3109 PdeDst.n.u1Present = 1;
3110 PdeDst.n.u1Write = 1;
3111 PdeDst.b.u1Size = 1;
3112# if PGM_SHW_TYPE == PGM_TYPE_EPT
3113 PdeDst.n.u1Execute = 1;
3114 PdeDst.b.u1IgnorePAT = 1;
3115 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3116# else
3117 PdeDst.n.u1User = 1;
3118# endif
3119 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3120
3121 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3122 /* Add a reference to the first page only. */
3123 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3124
3125 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3126 return VINF_SUCCESS;
3127 }
3128 }
3129 }
3130# endif /* HC_ARCH_BITS == 64 */
3131
3132 GSTPDE PdeSrc;
3133 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3134 PdeSrc.n.u1Present = 1;
3135 PdeSrc.n.u1Write = 1;
3136 PdeSrc.n.u1Accessed = 1;
3137 PdeSrc.n.u1User = 1;
3138
3139 /*
3140 * Allocate & map the page table.
3141 */
3142 PSHWPT pPTDst;
3143 PPGMPOOLPAGE pShwPage;
3144 RTGCPHYS GCPhys;
3145
3146 /* Virtual address = physical address */
3147 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3148 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3149
3150 if ( rc == VINF_SUCCESS
3151 || rc == VINF_PGM_CACHED_PAGE)
3152 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3153 else
3154 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3155
3156 PdeDst.u &= X86_PDE_AVL_MASK;
3157 PdeDst.u |= pShwPage->Core.Key;
3158 PdeDst.n.u1Present = 1;
3159 PdeDst.n.u1Write = 1;
3160# if PGM_SHW_TYPE == PGM_TYPE_EPT
3161 PdeDst.n.u1Execute = 1;
3162# else
3163 PdeDst.n.u1User = 1;
3164 PdeDst.n.u1Accessed = 1;
3165# endif
3166 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3167
3168 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3169 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3170 return rc;
3171
3172#else
3173 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3174 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3175 return VERR_INTERNAL_ERROR;
3176#endif
3177}
3178
3179
3180
3181/**
3182 * Prefetch a page/set of pages.
3183 *
3184 * Typically used to sync commonly used pages before entering raw mode
3185 * after a CR3 reload.
3186 *
3187 * @returns VBox status code.
3188 * @param pVCpu The VMCPU handle.
3189 * @param GCPtrPage Page to invalidate.
3190 */
3191PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3192{
3193#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3194 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3195 /*
3196 * Check that all Guest levels thru the PDE are present, getting the
3197 * PD and PDE in the processes.
3198 */
3199 int rc = VINF_SUCCESS;
3200# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3201# if PGM_GST_TYPE == PGM_TYPE_32BIT
3202 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3203 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3204# elif PGM_GST_TYPE == PGM_TYPE_PAE
3205 unsigned iPDSrc;
3206 X86PDPE PdpeSrc;
3207 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3208 if (!pPDSrc)
3209 return VINF_SUCCESS; /* not present */
3210# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3211 unsigned iPDSrc;
3212 PX86PML4E pPml4eSrc;
3213 X86PDPE PdpeSrc;
3214 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3215 if (!pPDSrc)
3216 return VINF_SUCCESS; /* not present */
3217# endif
3218 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3219# else
3220 PGSTPD pPDSrc = NULL;
3221 const unsigned iPDSrc = 0;
3222 GSTPDE PdeSrc;
3223
3224 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3225 PdeSrc.n.u1Present = 1;
3226 PdeSrc.n.u1Write = 1;
3227 PdeSrc.n.u1Accessed = 1;
3228 PdeSrc.n.u1User = 1;
3229# endif
3230
3231 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3232 {
3233 PVM pVM = pVCpu->CTX_SUFF(pVM);
3234 pgmLock(pVM);
3235
3236# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3237 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3238# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3239 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3240 PX86PDPAE pPDDst;
3241 X86PDEPAE PdeDst;
3242# if PGM_GST_TYPE != PGM_TYPE_PAE
3243 X86PDPE PdpeSrc;
3244
3245 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3246 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3247# endif
3248 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3249 if (rc != VINF_SUCCESS)
3250 {
3251 pgmUnlock(pVM);
3252 AssertRC(rc);
3253 return rc;
3254 }
3255 Assert(pPDDst);
3256 PdeDst = pPDDst->a[iPDDst];
3257
3258# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3259 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3260 PX86PDPAE pPDDst;
3261 X86PDEPAE PdeDst;
3262
3263# if PGM_GST_TYPE == PGM_TYPE_PROT
3264 /* AMD-V nested paging */
3265 X86PML4E Pml4eSrc;
3266 X86PDPE PdpeSrc;
3267 PX86PML4E pPml4eSrc = &Pml4eSrc;
3268
3269 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3270 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3271 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3272# endif
3273
3274 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3275 if (rc != VINF_SUCCESS)
3276 {
3277 pgmUnlock(pVM);
3278 AssertRC(rc);
3279 return rc;
3280 }
3281 Assert(pPDDst);
3282 PdeDst = pPDDst->a[iPDDst];
3283# endif
3284 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3285 {
3286 if (!PdeDst.n.u1Present)
3287 {
3288 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3289 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3290 }
3291 else
3292 {
3293 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3294 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3295 * makes no sense to prefetch more than one page.
3296 */
3297 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3298 if (RT_SUCCESS(rc))
3299 rc = VINF_SUCCESS;
3300 }
3301 }
3302 pgmUnlock(pVM);
3303 }
3304 return rc;
3305
3306#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3307 return VINF_SUCCESS; /* ignore */
3308#endif
3309}
3310
3311
3312
3313
3314/**
3315 * Syncs a page during a PGMVerifyAccess() call.
3316 *
3317 * @returns VBox status code (informational included).
3318 * @param pVCpu The VMCPU handle.
3319 * @param GCPtrPage The address of the page to sync.
3320 * @param fPage The effective guest page flags.
3321 * @param uErr The trap error code.
3322 */
3323PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3324{
3325 PVM pVM = pVCpu->CTX_SUFF(pVM);
3326
3327 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3328
3329 Assert(!HWACCMIsNestedPagingActive(pVM));
3330#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3331 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3332
3333# ifndef IN_RING0
3334 if (!(fPage & X86_PTE_US))
3335 {
3336 /*
3337 * Mark this page as safe.
3338 */
3339 /** @todo not correct for pages that contain both code and data!! */
3340 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3341 CSAMMarkPage(pVM, GCPtrPage, true);
3342 }
3343# endif
3344
3345 /*
3346 * Get guest PD and index.
3347 */
3348# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3349# if PGM_GST_TYPE == PGM_TYPE_32BIT
3350 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3351 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3352# elif PGM_GST_TYPE == PGM_TYPE_PAE
3353 unsigned iPDSrc = 0;
3354 X86PDPE PdpeSrc;
3355 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3356
3357 if (pPDSrc)
3358 {
3359 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3360 return VINF_EM_RAW_GUEST_TRAP;
3361 }
3362# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3363 unsigned iPDSrc;
3364 PX86PML4E pPml4eSrc;
3365 X86PDPE PdpeSrc;
3366 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3367 if (!pPDSrc)
3368 {
3369 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3370 return VINF_EM_RAW_GUEST_TRAP;
3371 }
3372# endif
3373# else
3374 PGSTPD pPDSrc = NULL;
3375 const unsigned iPDSrc = 0;
3376# endif
3377 int rc = VINF_SUCCESS;
3378
3379 pgmLock(pVM);
3380
3381 /*
3382 * First check if the shadow pd is present.
3383 */
3384# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3385 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3386# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3387 PX86PDEPAE pPdeDst;
3388 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3389 PX86PDPAE pPDDst;
3390# if PGM_GST_TYPE != PGM_TYPE_PAE
3391 X86PDPE PdpeSrc;
3392
3393 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3394 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3395# endif
3396 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3397 if (rc != VINF_SUCCESS)
3398 {
3399 pgmUnlock(pVM);
3400 AssertRC(rc);
3401 return rc;
3402 }
3403 Assert(pPDDst);
3404 pPdeDst = &pPDDst->a[iPDDst];
3405
3406# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3407 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3408 PX86PDPAE pPDDst;
3409 PX86PDEPAE pPdeDst;
3410
3411# if PGM_GST_TYPE == PGM_TYPE_PROT
3412 /* AMD-V nested paging */
3413 X86PML4E Pml4eSrc;
3414 X86PDPE PdpeSrc;
3415 PX86PML4E pPml4eSrc = &Pml4eSrc;
3416
3417 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3418 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3419 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3420# endif
3421
3422 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3423 if (rc != VINF_SUCCESS)
3424 {
3425 pgmUnlock(pVM);
3426 AssertRC(rc);
3427 return rc;
3428 }
3429 Assert(pPDDst);
3430 pPdeDst = &pPDDst->a[iPDDst];
3431# endif
3432
3433# if defined(IN_RC)
3434 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3435 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3436# endif
3437
3438 if (!pPdeDst->n.u1Present)
3439 {
3440 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3441 if (rc != VINF_SUCCESS)
3442 {
3443# if defined(IN_RC)
3444 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3445 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3446# endif
3447 pgmUnlock(pVM);
3448 AssertRC(rc);
3449 return rc;
3450 }
3451 }
3452
3453# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3454 /* Check for dirty bit fault */
3455 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3456 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3457 Log(("PGMVerifyAccess: success (dirty)\n"));
3458 else
3459 {
3460 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3461# else
3462 {
3463 GSTPDE PdeSrc;
3464 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3465 PdeSrc.n.u1Present = 1;
3466 PdeSrc.n.u1Write = 1;
3467 PdeSrc.n.u1Accessed = 1;
3468 PdeSrc.n.u1User = 1;
3469
3470# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3471 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3472 if (uErr & X86_TRAP_PF_US)
3473 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3474 else /* supervisor */
3475 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3476
3477 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3478 if (RT_SUCCESS(rc))
3479 {
3480 /* Page was successfully synced */
3481 Log2(("PGMVerifyAccess: success (sync)\n"));
3482 rc = VINF_SUCCESS;
3483 }
3484 else
3485 {
3486 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3487 rc = VINF_EM_RAW_GUEST_TRAP;
3488 }
3489 }
3490# if defined(IN_RC)
3491 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3492 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3493# endif
3494 pgmUnlock(pVM);
3495 return rc;
3496
3497#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3498
3499 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3500 return VERR_INTERNAL_ERROR;
3501#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3502}
3503
3504
3505/**
3506 * Syncs the paging hierarchy starting at CR3.
3507 *
3508 * @returns VBox status code, no specials.
3509 * @param pVCpu The VMCPU handle.
3510 * @param cr0 Guest context CR0 register
3511 * @param cr3 Guest context CR3 register
3512 * @param cr4 Guest context CR4 register
3513 * @param fGlobal Including global page directories or not
3514 */
3515PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3516{
3517 PVM pVM = pVCpu->CTX_SUFF(pVM);
3518
3519 LogFlow(("SyncCR3 fGlobal=%d\n", !!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
3520
3521#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3522
3523 pgmLock(pVM);
3524
3525# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3526 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3527 if (pPool->cDirtyPages)
3528 pgmPoolResetDirtyPages(pVM);
3529# endif
3530
3531 /*
3532 * Update page access handlers.
3533 * The virtual are always flushed, while the physical are only on demand.
3534 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3535 * have to look into that later because it will have a bad influence on the performance.
3536 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3537 * bird: Yes, but that won't work for aliases.
3538 */
3539 /** @todo this MUST go away. See #1557. */
3540 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3541 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3542 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3543 pgmUnlock(pVM);
3544#endif /* !NESTED && !EPT */
3545
3546#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3547 /*
3548 * Nested / EPT - almost no work.
3549 */
3550 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3551 return VINF_SUCCESS;
3552
3553#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3554 /*
3555 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3556 * out the shadow parts when the guest modifies its tables.
3557 */
3558 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3559 return VINF_SUCCESS;
3560
3561#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3562
3563# ifndef PGM_WITHOUT_MAPPINGS
3564 /*
3565 * Check for and resolve conflicts with our guest mappings if they
3566 * are enabled and not fixed.
3567 */
3568 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
3569 {
3570 int rc = pgmMapResolveConflicts(pVM);
3571 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3572 if (rc == VINF_PGM_SYNC_CR3)
3573 {
3574 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3575 return VINF_PGM_SYNC_CR3;
3576 }
3577 }
3578# else
3579 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3580# endif
3581 return VINF_SUCCESS;
3582#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3583}
3584
3585
3586
3587
3588#ifdef VBOX_STRICT
3589#ifdef IN_RC
3590# undef AssertMsgFailed
3591# define AssertMsgFailed Log
3592#endif
3593#ifdef IN_RING3
3594# include <VBox/dbgf.h>
3595
3596/**
3597 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3598 *
3599 * @returns VBox status code (VINF_SUCCESS).
3600 * @param cr3 The root of the hierarchy.
3601 * @param crr The cr4, only PAE and PSE is currently used.
3602 * @param fLongMode Set if long mode, false if not long mode.
3603 * @param cMaxDepth Number of levels to dump.
3604 * @param pHlp Pointer to the output functions.
3605 */
3606RT_C_DECLS_BEGIN
3607VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3608RT_C_DECLS_END
3609
3610#endif
3611
3612/**
3613 * Checks that the shadow page table is in sync with the guest one.
3614 *
3615 * @returns The number of errors.
3616 * @param pVM The virtual machine.
3617 * @param pVCpu The VMCPU handle.
3618 * @param cr3 Guest context CR3 register
3619 * @param cr4 Guest context CR4 register
3620 * @param GCPtr Where to start. Defaults to 0.
3621 * @param cb How much to check. Defaults to everything.
3622 */
3623PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3624{
3625#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3626 return 0;
3627#else
3628 unsigned cErrors = 0;
3629 PVM pVM = pVCpu->CTX_SUFF(pVM);
3630 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3631
3632#if PGM_GST_TYPE == PGM_TYPE_PAE
3633 /** @todo currently broken; crashes below somewhere */
3634 AssertFailed();
3635#endif
3636
3637#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3638 || PGM_GST_TYPE == PGM_TYPE_PAE \
3639 || PGM_GST_TYPE == PGM_TYPE_AMD64
3640
3641# if PGM_GST_TYPE == PGM_TYPE_32BIT
3642 bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
3643# else
3644 bool fBigPagesSupported = true;
3645# endif
3646 PPGMCPU pPGM = &pVCpu->pgm.s;
3647 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3648 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3649# ifndef IN_RING0
3650 RTHCPHYS HCPhys; /* general usage. */
3651# endif
3652 int rc;
3653
3654 /*
3655 * Check that the Guest CR3 and all its mappings are correct.
3656 */
3657 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3658 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3659 false);
3660# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3661# if PGM_GST_TYPE == PGM_TYPE_32BIT
3662 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3663# else
3664 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3665# endif
3666 AssertRCReturn(rc, 1);
3667 HCPhys = NIL_RTHCPHYS;
3668 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3669 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3670# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3671 pgmGstGet32bitPDPtr(pPGM);
3672 RTGCPHYS GCPhys;
3673 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3674 AssertRCReturn(rc, 1);
3675 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3676# endif
3677# endif /* !IN_RING0 */
3678
3679 /*
3680 * Get and check the Shadow CR3.
3681 */
3682# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3683 unsigned cPDEs = X86_PG_ENTRIES;
3684 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3685# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3686# if PGM_GST_TYPE == PGM_TYPE_32BIT
3687 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3688# else
3689 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3690# endif
3691 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3692# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3693 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3694 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3695# endif
3696 if (cb != ~(RTGCPTR)0)
3697 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3698
3699/** @todo call the other two PGMAssert*() functions. */
3700
3701# if PGM_GST_TYPE == PGM_TYPE_AMD64
3702 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3703
3704 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3705 {
3706 PPGMPOOLPAGE pShwPdpt = NULL;
3707 PX86PML4E pPml4eSrc;
3708 PX86PML4E pPml4eDst;
3709 RTGCPHYS GCPhysPdptSrc;
3710
3711 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3712 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3713
3714 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3715 if (!pPml4eDst->n.u1Present)
3716 {
3717 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3718 continue;
3719 }
3720
3721 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3722 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3723
3724 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3725 {
3726 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3727 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3728 cErrors++;
3729 continue;
3730 }
3731
3732 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3733 {
3734 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3735 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3736 cErrors++;
3737 continue;
3738 }
3739
3740 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3741 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3742 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3743 {
3744 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3745 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3746 cErrors++;
3747 continue;
3748 }
3749# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3750 {
3751# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3752
3753# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3754 /*
3755 * Check the PDPTEs too.
3756 */
3757 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3758
3759 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3760 {
3761 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3762 PPGMPOOLPAGE pShwPde = NULL;
3763 PX86PDPE pPdpeDst;
3764 RTGCPHYS GCPhysPdeSrc;
3765# if PGM_GST_TYPE == PGM_TYPE_PAE
3766 X86PDPE PdpeSrc;
3767 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3768 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3769# else
3770 PX86PML4E pPml4eSrcIgn;
3771 X86PDPE PdpeSrc;
3772 PX86PDPT pPdptDst;
3773 PX86PDPAE pPDDst;
3774 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3775
3776 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3777 if (rc != VINF_SUCCESS)
3778 {
3779 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3780 GCPtr += 512 * _2M;
3781 continue; /* next PDPTE */
3782 }
3783 Assert(pPDDst);
3784# endif
3785 Assert(iPDSrc == 0);
3786
3787 pPdpeDst = &pPdptDst->a[iPdpt];
3788
3789 if (!pPdpeDst->n.u1Present)
3790 {
3791 GCPtr += 512 * _2M;
3792 continue; /* next PDPTE */
3793 }
3794
3795 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3796 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3797
3798 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3799 {
3800 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3801 GCPtr += 512 * _2M;
3802 cErrors++;
3803 continue;
3804 }
3805
3806 if (GCPhysPdeSrc != pShwPde->GCPhys)
3807 {
3808# if PGM_GST_TYPE == PGM_TYPE_AMD64
3809 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3810# else
3811 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3812# endif
3813 GCPtr += 512 * _2M;
3814 cErrors++;
3815 continue;
3816 }
3817
3818# if PGM_GST_TYPE == PGM_TYPE_AMD64
3819 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3820 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3821 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3822 {
3823 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3824 GCPtr += 512 * _2M;
3825 cErrors++;
3826 continue;
3827 }
3828# endif
3829
3830# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3831 {
3832# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3833# if PGM_GST_TYPE == PGM_TYPE_32BIT
3834 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3835# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3836 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3837# endif
3838# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3839 /*
3840 * Iterate the shadow page directory.
3841 */
3842 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3843 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3844
3845 for (;
3846 iPDDst < cPDEs;
3847 iPDDst++, GCPtr += cIncrement)
3848 {
3849# if PGM_SHW_TYPE == PGM_TYPE_PAE
3850 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3851# else
3852 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3853# endif
3854 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3855 {
3856 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3857 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3858 {
3859 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3860 cErrors++;
3861 continue;
3862 }
3863 }
3864 else if ( (PdeDst.u & X86_PDE_P)
3865 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3866 )
3867 {
3868 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3869 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3870 if (!pPoolPage)
3871 {
3872 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3873 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3874 cErrors++;
3875 continue;
3876 }
3877 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3878
3879 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3880 {
3881 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3882 GCPtr, (uint64_t)PdeDst.u));
3883 cErrors++;
3884 }
3885
3886 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3887 {
3888 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3889 GCPtr, (uint64_t)PdeDst.u));
3890 cErrors++;
3891 }
3892
3893 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3894 if (!PdeSrc.n.u1Present)
3895 {
3896 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3897 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3898 cErrors++;
3899 continue;
3900 }
3901
3902 if ( !PdeSrc.b.u1Size
3903 || !fBigPagesSupported)
3904 {
3905 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3906# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3907 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3908# endif
3909 }
3910 else
3911 {
3912# if PGM_GST_TYPE == PGM_TYPE_32BIT
3913 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3914 {
3915 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3916 GCPtr, (uint64_t)PdeSrc.u));
3917 cErrors++;
3918 continue;
3919 }
3920# endif
3921 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3922# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3923 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3924# endif
3925 }
3926
3927 if ( pPoolPage->enmKind
3928 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3929 {
3930 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3931 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3932 cErrors++;
3933 }
3934
3935 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3936 if (!pPhysPage)
3937 {
3938 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3939 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3940 cErrors++;
3941 continue;
3942 }
3943
3944 if (GCPhysGst != pPoolPage->GCPhys)
3945 {
3946 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3947 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3948 cErrors++;
3949 continue;
3950 }
3951
3952 if ( !PdeSrc.b.u1Size
3953 || !fBigPagesSupported)
3954 {
3955 /*
3956 * Page Table.
3957 */
3958 const GSTPT *pPTSrc;
3959 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3960 if (RT_FAILURE(rc))
3961 {
3962 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3963 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3964 cErrors++;
3965 continue;
3966 }
3967 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3968 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3969 {
3970 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3971 // (This problem will go away when/if we shadow multiple CR3s.)
3972 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3973 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3974 cErrors++;
3975 continue;
3976 }
3977 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3978 {
3979 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3980 GCPtr, (uint64_t)PdeDst.u));
3981 cErrors++;
3982 continue;
3983 }
3984
3985 /* iterate the page table. */
3986# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3987 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3988 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3989# else
3990 const unsigned offPTSrc = 0;
3991# endif
3992 for (unsigned iPT = 0, off = 0;
3993 iPT < RT_ELEMENTS(pPTDst->a);
3994 iPT++, off += PAGE_SIZE)
3995 {
3996 const SHWPTE PteDst = pPTDst->a[iPT];
3997
3998 /* skip not-present entries. */
3999 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
4000 continue;
4001 Assert(PteDst.n.u1Present);
4002
4003 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
4004 if (!PteSrc.n.u1Present)
4005 {
4006# ifdef IN_RING3
4007 PGMAssertHandlerAndFlagsInSync(pVM);
4008 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
4009# endif
4010 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
4011 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
4012 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
4013 cErrors++;
4014 continue;
4015 }
4016
4017 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
4018# if 1 /** @todo sync accessed bit properly... */
4019 fIgnoreFlags |= X86_PTE_A;
4020# endif
4021
4022 /* match the physical addresses */
4023 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
4024 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
4025
4026# ifdef IN_RING3
4027 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4028 if (RT_FAILURE(rc))
4029 {
4030 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4031 {
4032 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4033 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4034 cErrors++;
4035 continue;
4036 }
4037 }
4038 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4039 {
4040 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4041 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4042 cErrors++;
4043 continue;
4044 }
4045# endif
4046
4047 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4048 if (!pPhysPage)
4049 {
4050# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4051 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4052 {
4053 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4054 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4055 cErrors++;
4056 continue;
4057 }
4058# endif
4059 if (PteDst.n.u1Write)
4060 {
4061 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4062 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4063 cErrors++;
4064 }
4065 fIgnoreFlags |= X86_PTE_RW;
4066 }
4067 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4068 {
4069 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4070 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4071 cErrors++;
4072 continue;
4073 }
4074
4075 /* flags */
4076 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4077 {
4078 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4079 {
4080 if (PteDst.n.u1Write)
4081 {
4082 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4083 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4084 cErrors++;
4085 continue;
4086 }
4087 fIgnoreFlags |= X86_PTE_RW;
4088 }
4089 else
4090 {
4091 if (PteDst.n.u1Present)
4092 {
4093 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4094 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4095 cErrors++;
4096 continue;
4097 }
4098 fIgnoreFlags |= X86_PTE_P;
4099 }
4100 }
4101 else
4102 {
4103 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4104 {
4105 if (PteDst.n.u1Write)
4106 {
4107 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4108 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4109 cErrors++;
4110 continue;
4111 }
4112 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4113 {
4114 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4115 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4116 cErrors++;
4117 continue;
4118 }
4119 if (PteDst.n.u1Dirty)
4120 {
4121 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4122 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4123 cErrors++;
4124 }
4125# if 0 /** @todo sync access bit properly... */
4126 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4127 {
4128 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4129 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4130 cErrors++;
4131 }
4132 fIgnoreFlags |= X86_PTE_RW;
4133# else
4134 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4135# endif
4136 }
4137 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4138 {
4139 /* access bit emulation (not implemented). */
4140 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4141 {
4142 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4143 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4144 cErrors++;
4145 continue;
4146 }
4147 if (!PteDst.n.u1Accessed)
4148 {
4149 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4150 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4151 cErrors++;
4152 }
4153 fIgnoreFlags |= X86_PTE_P;
4154 }
4155# ifdef DEBUG_sandervl
4156 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4157# endif
4158 }
4159
4160 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4161 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4162 )
4163 {
4164 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4165 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4166 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4167 cErrors++;
4168 continue;
4169 }
4170 } /* foreach PTE */
4171 }
4172 else
4173 {
4174 /*
4175 * Big Page.
4176 */
4177 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4178 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4179 {
4180 if (PdeDst.n.u1Write)
4181 {
4182 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4183 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4184 cErrors++;
4185 continue;
4186 }
4187 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4188 {
4189 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4190 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4191 cErrors++;
4192 continue;
4193 }
4194# if 0 /** @todo sync access bit properly... */
4195 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4196 {
4197 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4198 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4199 cErrors++;
4200 }
4201 fIgnoreFlags |= X86_PTE_RW;
4202# else
4203 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4204# endif
4205 }
4206 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4207 {
4208 /* access bit emulation (not implemented). */
4209 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4210 {
4211 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4212 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4213 cErrors++;
4214 continue;
4215 }
4216 if (!PdeDst.n.u1Accessed)
4217 {
4218 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4219 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4220 cErrors++;
4221 }
4222 fIgnoreFlags |= X86_PTE_P;
4223 }
4224
4225 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4226 {
4227 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4228 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4229 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4230 cErrors++;
4231 }
4232
4233 /* iterate the page table. */
4234 for (unsigned iPT = 0, off = 0;
4235 iPT < RT_ELEMENTS(pPTDst->a);
4236 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4237 {
4238 const SHWPTE PteDst = pPTDst->a[iPT];
4239
4240 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4241 {
4242 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4243 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4244 cErrors++;
4245 }
4246
4247 /* skip not-present entries. */
4248 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4249 continue;
4250
4251 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4252
4253 /* match the physical addresses */
4254 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4255
4256# ifdef IN_RING3
4257 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4258 if (RT_FAILURE(rc))
4259 {
4260 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4261 {
4262 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4263 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4264 cErrors++;
4265 }
4266 }
4267 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4268 {
4269 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4270 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4271 cErrors++;
4272 continue;
4273 }
4274# endif
4275 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4276 if (!pPhysPage)
4277 {
4278# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4279 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4280 {
4281 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4282 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4283 cErrors++;
4284 continue;
4285 }
4286# endif
4287 if (PteDst.n.u1Write)
4288 {
4289 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4290 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4291 cErrors++;
4292 }
4293 fIgnoreFlags |= X86_PTE_RW;
4294 }
4295 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4296 {
4297 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4298 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4299 cErrors++;
4300 continue;
4301 }
4302
4303 /* flags */
4304 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4305 {
4306 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4307 {
4308 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4309 {
4310 if (PteDst.n.u1Write)
4311 {
4312 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4313 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4314 cErrors++;
4315 continue;
4316 }
4317 fIgnoreFlags |= X86_PTE_RW;
4318 }
4319 }
4320 else
4321 {
4322 if (PteDst.n.u1Present)
4323 {
4324 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4325 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4326 cErrors++;
4327 continue;
4328 }
4329 fIgnoreFlags |= X86_PTE_P;
4330 }
4331 }
4332
4333 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4334 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4335 )
4336 {
4337 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4338 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4339 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4340 cErrors++;
4341 continue;
4342 }
4343 } /* for each PTE */
4344 }
4345 }
4346 /* not present */
4347
4348 } /* for each PDE */
4349
4350 } /* for each PDPTE */
4351
4352 } /* for each PML4E */
4353
4354# ifdef DEBUG
4355 if (cErrors)
4356 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4357# endif
4358
4359#endif /* GST == 32BIT, PAE or AMD64 */
4360 return cErrors;
4361
4362#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4363}
4364#endif /* VBOX_STRICT */
4365
4366
4367/**
4368 * Sets up the CR3 for shadow paging
4369 *
4370 * @returns Strict VBox status code.
4371 * @retval VINF_SUCCESS.
4372 *
4373 * @param pVCpu The VMCPU handle.
4374 * @param GCPhysCR3 The physical address in the CR3 register.
4375 */
4376PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4377{
4378 PVM pVM = pVCpu->CTX_SUFF(pVM);
4379
4380 /* Update guest paging info. */
4381#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4382 || PGM_GST_TYPE == PGM_TYPE_PAE \
4383 || PGM_GST_TYPE == PGM_TYPE_AMD64
4384
4385 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4386
4387 /*
4388 * Map the page CR3 points at.
4389 */
4390 RTHCPTR HCPtrGuestCR3;
4391 RTHCPHYS HCPhysGuestCR3;
4392 pgmLock(pVM);
4393 PPGMPAGE pPageCR3 = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4394 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4395 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4396 /** @todo this needs some reworking wrt. locking. */
4397# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4398 HCPtrGuestCR3 = NIL_RTHCPTR;
4399 int rc = VINF_SUCCESS;
4400# else
4401 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4402# endif
4403 pgmUnlock(pVM);
4404 if (RT_SUCCESS(rc))
4405 {
4406 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4407 if (RT_SUCCESS(rc))
4408 {
4409# ifdef IN_RC
4410 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4411# endif
4412# if PGM_GST_TYPE == PGM_TYPE_32BIT
4413 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4414# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4415 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4416# endif
4417 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4418
4419# elif PGM_GST_TYPE == PGM_TYPE_PAE
4420 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4421 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4422# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4423 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4424# endif
4425 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4426 Log(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4427
4428 /*
4429 * Map the 4 PDs too.
4430 */
4431 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVCpu->pgm.s);
4432 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4433 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4434 {
4435 if (pGuestPDPT->a[i].n.u1Present)
4436 {
4437 RTHCPTR HCPtr;
4438 RTHCPHYS HCPhys;
4439 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4440 pgmLock(pVM);
4441 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4442 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4443 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4444# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4445 HCPtr = NIL_RTHCPTR;
4446 int rc2 = VINF_SUCCESS;
4447# else
4448 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4449# endif
4450 pgmUnlock(pVM);
4451 if (RT_SUCCESS(rc2))
4452 {
4453 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4454 AssertRCReturn(rc, rc);
4455
4456 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4457# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4458 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4459# endif
4460 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4461 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4462# ifdef IN_RC
4463 PGM_INVL_PG(pVCpu, GCPtr);
4464# endif
4465 continue;
4466 }
4467 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4468 }
4469
4470 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4471# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4472 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4473# endif
4474 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4475 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4476# ifdef IN_RC
4477 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4478# endif
4479 }
4480
4481# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4482 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4483# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4484 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4485# endif
4486# endif
4487 }
4488 else
4489 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4490 }
4491 else
4492 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4493
4494#else /* prot/real stub */
4495 int rc = VINF_SUCCESS;
4496#endif
4497
4498 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4499# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4500 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4501 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4502 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4503 && PGM_GST_TYPE != PGM_TYPE_PROT))
4504
4505 Assert(!HWACCMIsNestedPagingActive(pVM));
4506
4507 /*
4508 * Update the shadow root page as well since that's not fixed.
4509 */
4510 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4511 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4512 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4513 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4514 PPGMPOOLPAGE pNewShwPageCR3;
4515
4516 pgmLock(pVM);
4517
4518# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4519 if (pPool->cDirtyPages)
4520 pgmPoolResetDirtyPages(pVM);
4521# endif
4522
4523 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4524 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4525 AssertFatalRC(rc);
4526 rc = VINF_SUCCESS;
4527
4528# ifdef IN_RC
4529 /*
4530 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4531 * state will be inconsistent! Flush important things now while
4532 * we still can and then make sure there are no ring-3 calls.
4533 */
4534 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4535 VMMRZCallRing3Disable(pVCpu);
4536# endif
4537
4538 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4539 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4540 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4541# ifdef IN_RING0
4542 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4543 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4544# elif defined(IN_RC)
4545 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4546 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4547# else
4548 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4549 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4550# endif
4551
4552# ifndef PGM_WITHOUT_MAPPINGS
4553 /*
4554 * Apply all hypervisor mappings to the new CR3.
4555 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4556 * make sure we check for conflicts in the new CR3 root.
4557 */
4558# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4559 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4560# endif
4561 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4562 AssertRCReturn(rc, rc);
4563# endif
4564
4565 /* Set the current hypervisor CR3. */
4566 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4567 SELMShadowCR3Changed(pVM, pVCpu);
4568
4569# ifdef IN_RC
4570 /* NOTE: The state is consistent again. */
4571 VMMRZCallRing3Enable(pVCpu);
4572# endif
4573
4574 /* Clean up the old CR3 root. */
4575 if ( pOldShwPageCR3
4576 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4577 {
4578 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4579# ifndef PGM_WITHOUT_MAPPINGS
4580 /* Remove the hypervisor mappings from the shadow page table. */
4581 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4582# endif
4583 /* Mark the page as unlocked; allow flushing again. */
4584 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4585
4586 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4587 }
4588 pgmUnlock(pVM);
4589# endif
4590
4591 return rc;
4592}
4593
4594/**
4595 * Unmaps the shadow CR3.
4596 *
4597 * @returns VBox status, no specials.
4598 * @param pVCpu The VMCPU handle.
4599 */
4600PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4601{
4602 LogFlow(("UnmapCR3\n"));
4603
4604 int rc = VINF_SUCCESS;
4605 PVM pVM = pVCpu->CTX_SUFF(pVM);
4606
4607 /*
4608 * Update guest paging info.
4609 */
4610#if PGM_GST_TYPE == PGM_TYPE_32BIT
4611 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4612# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4613 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4614# endif
4615 pVCpu->pgm.s.pGst32BitPdRC = 0;
4616
4617#elif PGM_GST_TYPE == PGM_TYPE_PAE
4618 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4619# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4620 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4621# endif
4622 pVCpu->pgm.s.pGstPaePdptRC = 0;
4623 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4624 {
4625 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4626# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4627 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4628# endif
4629 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4630 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4631 }
4632
4633#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4634 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4635# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4636 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4637# endif
4638
4639#else /* prot/real mode stub */
4640 /* nothing to do */
4641#endif
4642
4643#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4644 /*
4645 * Update shadow paging info.
4646 */
4647# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4648 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4649 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4650
4651# if PGM_GST_TYPE != PGM_TYPE_REAL
4652 Assert(!HWACCMIsNestedPagingActive(pVM));
4653# endif
4654
4655 pgmLock(pVM);
4656
4657# ifndef PGM_WITHOUT_MAPPINGS
4658 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4659 /* Remove the hypervisor mappings from the shadow page table. */
4660 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4661# endif
4662
4663 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4664 {
4665 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4666
4667 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4668
4669# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4670 if (pPool->cDirtyPages)
4671 pgmPoolResetDirtyPages(pVM);
4672# endif
4673
4674 /* Mark the page as unlocked; allow flushing again. */
4675 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4676
4677 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4678 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4679 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4680 pVCpu->pgm.s.pShwPageCR3RC = 0;
4681 pVCpu->pgm.s.iShwUser = 0;
4682 pVCpu->pgm.s.iShwUserTable = 0;
4683 }
4684 pgmUnlock(pVM);
4685# endif
4686#endif /* !IN_RC*/
4687
4688 return rc;
4689}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette